added ml, questions submit

This commit is contained in:
Alexander Karpov 2023-08-26 09:36:47 +03:00
parent 2cc1736951
commit d784e04ff5
14 changed files with 481 additions and 57 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
!.gitkeep
.idea
### Python template
# Byte-compiled / optimized / DLL files

View File

@ -351,4 +351,4 @@
],
}
OPENAI_KEY = env("OPENAI_KEY")
OPENAI_KEY = env("OPENAI_KEY", default="")

BIN
ml/data.pic Normal file

Binary file not shown.

BIN
ml/index.ann Normal file

Binary file not shown.

View File

@ -33,38 +33,37 @@
prompts = [
"""
По тексту ответь или предположи ответ на вопросы в следющем формате:
{
'users': 'Кто будет пользоваться продуктом',
'problems': 'Какие проблемы решает продукт',
'actuality': 'Продолжите предложение: Актуальность проблемы подтверждается тем фактом, что...',
'solve': 'Как решаем эти проблемы',
'works': 'Как работает решение',
}
""",
По тексту ответь или предположи ответ на вопросы в следющем формате:
{
'users': 'Кто будет пользоваться продуктом',
'problems': 'Какие проблемы решает продукт',
'actuality': 'Продолжите предложение: Актуальность проблемы подтверждается тем фактом, что...',
'solve': 'Как решаем эти проблемы',
'works': 'Как работает решение',
}
""",
"""
По тексту ответь или предположи ответ на вопросы в следющем формате:
{
'awards': 'Когда проблема будет решена, какова будет ценность для ваших пользователей',
'money': 'На чем проект зарабатывает? сколько и за что ему платят клиенты',
'aims': Напиши 3 цели: на месяц, на полгода и год, формат: {'1': цель на месяц, '2': цель на полгода, '3': цель на год},
'investments_sold': 'На что потратить инвестиции под проект',
'financial_indicators': 'Напиши финансовые показатели проекта'
}
""",
По тексту ответь или предположи ответ на вопросы в следющем формате:
{
'awards': 'Когда проблема будет решена, какова будет ценность для ваших пользователей',
'money': 'На чем проект зарабатывает? сколько и за что ему платят клиенты',
'aims': Напиши 3 цели: на месяц, на полгода и год, формат: {'1': цель на месяц, '2': цель на полгода, '3': цель на год},
'investments_sold': 'На что потратить инвестиции под проект',
'financial_indicators': 'Напиши финансовые показатели проекта'
}
""",
"""
По тексту ответь или предположи ответ на вопросы в следющем формате:
{
По тексту ответь или предположи ответ на вопросы в следющем формате:
{
'achieve': 'Чего добьется команда после освоения инвестиций',
'competitors_strength': 'Сильные стороны конкурентов',
'competitors_low': 'Слабые стороны конкурентов',
'advantages': 'Какие могут быть преимущества над конкурентами'
}
""",
'achieve': 'Чего добьется команда после освоения инвестиций',
'competitors_strength': 'Сильные стороны конкурентов',
'competitors_low': 'Слабые стороны конкурентов',
'advantages': 'Какие могут быть преимущества над конкурентами'
}
""",
]
openai.api_key = KEY
assertions = [
@ -105,15 +104,21 @@ def create_hints(description: str, stage: int):
content["aims"] = [
{
"aim": content["aims"]["1"],
"date": datetime.datetime.now() + datetime.timedelta(days=30),
"date": (
datetime.datetime.now() + datetime.timedelta(days=30)
).isoformat(),
},
{
"aim": content["aims"]["2"],
"date": datetime.datetime.now() + datetime.timedelta(days=180),
"date": (
datetime.datetime.now() + datetime.timedelta(days=180)
).isoformat(),
},
{
"aim": content["aims"]["3"],
"date": datetime.datetime.now() + datetime.timedelta(days=365),
"date": (
datetime.datetime.now() + datetime.timedelta(days=365)
).isoformat(),
},
]
result = []

49
ml/search.py Normal file
View File

@ -0,0 +1,49 @@
from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer
import pickle
model = None
data = None
index = None
def get_model():
global model
if not model:
model = SentenceTransformer("sentence-transformers/LaBSE")
return model
def get_data():
global data
if not data:
with open("ml/data.pic", "rb") as file:
data = pickle.load(file)
print(len(data))
return data
def get_index():
global index
if not index:
index = AnnoyIndex(768, "angular")
index.load("ml/index.ann")
return index
def search(search_string):
embs = get_model().encode([search_string])[0]
indexes = get_index().get_nns_by_vector(embs, 5)
res = []
for i in indexes:
res.append(get_data()[i])
return list(
map(
lambda x: {
"logo": x["image"],
"name": x["name"],
"description": x["description"],
},
res,
)
)

View File

@ -1,7 +1,16 @@
from dateutil.parser import parse
from django.core.files.uploadedfile import InMemoryUploadedFile, TemporaryUploadedFile
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers
from rest_framework.generics import get_object_or_404
from pitch_deck_generator.decks.models import PitchDeck, Question, QuestionDeckHint
from pitch_deck_generator.decks.models import (
PitchDeck,
Question,
QuestionDeckHint,
QuestionAnswer,
QuestionAnswerPhoto,
)
class BasePitchDeckSerializer(serializers.ModelSerializer):
@ -35,12 +44,17 @@ class QuestionSerializer(serializers.ModelSerializer):
@extend_schema_field(HintSerializer)
def get_hint(self, obj):
if obj.hint:
question_id = (
self.context["view"].kwargs["question_id"]
if "question_id" in self.context["view"].kwargs
else 1
)
if q := QuestionDeckHint.objects.filter(
question_id=self.context["kwargs"]["question"],
deck_id=self.context["kwargs"]["deck"],
question_id=question_id,
deck_id=self.context["view"].kwargs["deck_id"],
):
return q.first().hint
return ""
return {}
return False
@extend_schema_field(serializers.IntegerField)
@ -51,4 +65,165 @@ def get_next_id(self, obj):
class Meta:
model = Question
fields = ["id", "text", "hint", "required", "params", "next_id"]
fields = ["id", "text", "hint", "required", "type", "params", "next_id"]
class AnswerSerializer(serializers.ModelSerializer):
def __init__(self, *args, **kwargs):
file_fields = kwargs.pop("file_fields", None)
super().__init__(*args, **kwargs)
if file_fields:
field_update_dict = {
field: serializers.FileField(required=False, write_only=True)
for field in file_fields
}
self.fields.update(**field_update_dict)
file = serializers.FileField(allow_null=True, required=False)
class Meta:
model = QuestionAnswer
fields = ["answer", "deck", "question", "file"]
extra_kwargs = {
"deck": {"read_only": True},
"question": {"read_only": True},
}
def validate(self, data):
answer = data["answer"]
question = get_object_or_404(
Question, id=self.context["view"].kwargs["question_id"]
)
deck = get_object_or_404(PitchDeck, id=self.context["view"].kwargs["deck_id"])
question_type = question.type
params = question.params if question.params else {}
match question_type:
case "text":
if type(answer) is not str:
raise serializers.ValidationError("Incorrect type")
if "max_length" in params:
if len(answer) > params["max_length"]:
raise serializers.ValidationError("Text is too long")
case "number":
if type(answer) is not str:
raise serializers.ValidationError("Incorrect type")
case "text_array":
if type(answer) is not list:
raise serializers.ValidationError("Incorrect type")
if any([type(x) is not str for x in answer]):
raise serializers.ValidationError("Incorrect type")
case "range":
slug = params["slug"]
if slug not in answer:
raise serializers.ValidationError("Value to found")
if not isinstance(answer[slug], (int, float)):
raise serializers.ValidationError("Incorrect type")
if not (params["min_value"] <= answer[slug] <= params["max_value"]):
raise serializers.ValidationError("Number is too big or too small")
case "multiple_range":
for slug in [x["slug"] for x in params["scrollbars"]]:
if slug not in answer:
raise serializers.ValidationError(f"Value {slug} to found")
if not isinstance(answer[slug], (int, float)):
raise serializers.ValidationError(f"Incorrect {slug} type")
if not (params["min_value"] <= answer[slug] <= params["max_value"]):
raise serializers.ValidationError(
f"Number is too big or too small for {slug}"
)
case "select":
if answer not in params["options"]:
raise serializers.ValidationError("No such option")
case "date":
try:
parse(answer)
except ValueError:
raise serializers.ValidationError("Incorrect date type")
case "photo":
if answer:
raise serializers.ValidationError("Answer should be blank")
if not data["file"]:
raise serializers.ValidationError("No file found")
case "multiple_photo":
if answer:
raise serializers.ValidationError("Answer should be blank")
for key, value in data.items():
if isinstance(value, InMemoryUploadedFile):
if "_" not in key:
raise serializers.ValidationError(
"You should use file_num for file keys"
)
try:
int(key.split("_")[1])
except ValueError:
raise serializers.ValidationError(
"You should use file_num for file keys"
)
case "photo_description":
if not data["file"]:
raise serializers.ValidationError("No file found")
if type(answer) is not str:
raise serializers.ValidationError("Incorrect type")
if "max_length" in params:
if len(answer) > params["max_length"]:
raise serializers.ValidationError("Text is too long")
case "multiple_photo_description":
if type(answer) is not list:
raise serializers.ValidationError("Incorrect type")
if any([type(x) is not str for x in answer]):
raise serializers.ValidationError("Incorrect type")
len_f = 0
for key, value in data.items():
if isinstance(value, TemporaryUploadedFile):
if "_" not in key:
raise serializers.ValidationError(
"You should use file_num for file keys"
)
try:
int(key.split("_")[1])
except ValueError:
raise serializers.ValidationError(
"You should use file_num for file keys"
)
len_f += 1
if len_f != len(answer):
raise serializers.ValidationError(
"You should provide the same amount of answers in list as photos"
)
case "multiple_link_description":
if type(answer) is not dict:
raise serializers.ValidationError("Incorrect type")
if any([type(x) is not str for x in answer.keys()]):
raise serializers.ValidationError("Incorrect type")
if any([type(x) is not str for x in answer.values()]):
raise serializers.ValidationError("Incorrect type")
data["question_id"] = question.id
data["deck_id"] = deck.id
return data
def create(self, validated_data):
q = QuestionAnswer.objects.get_or_create(
deck_id=validated_data["deck_id"], question_id=validated_data["question_id"]
)[0]
q.answer = validated_data["answer"]
q.save()
s = [
key
for key, val in validated_data.items()
if isinstance(val, TemporaryUploadedFile) and key != "file"
]
if "file" in validated_data:
QuestionAnswerPhoto.objects.create(answer=q, file=validated_data["file"])
elif s:
s.sort(key=lambda x: int(x.split("_")[1]))
for key in s:
QuestionAnswerPhoto.objects.create(answer=q, file=validated_data[key])
return q

View File

@ -5,7 +5,7 @@
RetrievePitchApiView,
GetFirstQuestionApiView,
GetDeckQuestionApiView,
GetDeckQuestionHintApiView,
GetDeckQuestionHintApiView, CreateQuestionAnswerApiView,
)
app_name = "decks"
@ -15,5 +15,6 @@
path("<int:id>", RetrievePitchApiView.as_view()),
path("question/<int:deck_id>", GetFirstQuestionApiView.as_view()),
path("question/<int:deck_id>/<int:question_id>", GetDeckQuestionApiView.as_view()),
path("question/<int:deck_id>/<int:question_id>/", CreateQuestionAnswerApiView.as_view()),
path("hint/<int:deck_id>/<int:question_id>", GetDeckQuestionHintApiView.as_view()),
]

View File

@ -1,13 +1,16 @@
from rest_framework import generics
from rest_framework import generics, status
from rest_framework.generics import get_object_or_404
from rest_framework.response import Response
from rest_framework.parsers import JSONParser, FormParser, MultiPartParser
from pitch_deck_generator.decks.api.serializers import (
BasePitchDeckSerializer,
PitchDeckSerializer,
QuestionSerializer, HintSerializer,
QuestionSerializer,
HintSerializer,
AnswerSerializer,
)
from pitch_deck_generator.decks.models import PitchDeck
from pitch_deck_generator.decks.models import PitchDeck, QuestionDeckHint, Question
class ListDecksApiView(generics.ListCreateAPIView):
@ -22,22 +25,48 @@ def get_object(self):
return get_object_or_404(PitchDeck, id=self.kwargs["id"])
class GetFirstQuestionApiView(generics.GenericAPIView):
class GetFirstQuestionApiView(generics.RetrieveAPIView):
serializer_class = QuestionSerializer
def get(self, request, *args, **kwargs):
return Response()
def get_object(self):
return Question.objects.get(order=1)
class GetDeckQuestionApiView(generics.GenericAPIView):
class GetDeckQuestionApiView(generics.RetrieveAPIView):
serializer_class = QuestionSerializer
def get(self, request, *args, **kwargs):
return Response()
def get_object(self):
return get_object_or_404(Question, id=self.kwargs["question_id"])
class CreateQuestionAnswerApiView(generics.CreateAPIView):
serializer_class = AnswerSerializer
def create(self, request, *args, **kwargs):
# main thing starts
file_fields = list(request.FILES.keys()) # list to be passed to the serializer
serializer = self.get_serializer(data=request.data, file_fields=file_fields)
# main thing ends
serializer.is_valid(raise_exception=True)
self.perform_create(serializer)
headers = self.get_success_headers(serializer.data)
return Response(
serializer.data, status=status.HTTP_201_CREATED, headers=headers
)
class GetDeckQuestionHintApiView(generics.GenericAPIView):
serializer_class = HintSerializer
parser_classes = [JSONParser, FormParser, MultiPartParser]
def get(self, request, *args, **kwargs):
return Response()
hint = get_object_or_404(
QuestionDeckHint,
question_id=self.kwargs["question_id"],
deck_id=self.kwargs["deck_id"],
)
data = hint.hint
if data:
return Response(data)
return Response(status=404)

View File

@ -4,3 +4,9 @@
class DecksConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "pitch_deck_generator.decks"
def ready(self):
try:
import pitch_deck_generator.decks.signals # noqa F401
except ImportError:
pass

View File

@ -0,0 +1,44 @@
# Generated by Django 4.2.4 on 2023-08-26 00:22
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("decks", "0006_alter_question_type"),
]
operations = [
migrations.AlterField(
model_name="question",
name="type",
field=models.CharField(
choices=[
("text", "Text"),
("number", "Number"),
("text_array", "text array"),
("range", "Range"),
("multiple_range", "multiple range"),
("select", "Select"),
("link", "Link"),
("date", "Date"),
("photo", "Photo"),
("multiple_photo", "multiple photo"),
("photo_description", "photo description"),
("multiple_link_description", "multiple link description"),
("multiple_photo_description", "multiple photo description"),
("multiple_links", "multiple links"),
],
max_length=26,
),
),
migrations.AlterField(
model_name="questionanswer",
name="deck",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="decks.pitchdeck"
),
),
]

View File

@ -26,18 +26,24 @@ class Question(models.Model):
class QuestionType(models.TextChoices):
text = "text"
number = "number"
text_array = "text_array"
text_array = "text_array", "text array"
range = "range"
multiple_range = "multiple_range"
multiple_range = "multiple_range", "multiple range"
select = "select"
link = "link"
date = "date"
photo = "photo"
multiple_photo = "multiple_photo"
photo_description = "photo_description"
multiple_link_description = "multiple link description"
multiple_photo_description = "multiple photo description"
multiple_links = "multiple_links"
multiple_photo = "multiple_photo", "multiple photo"
photo_description = "photo_description", "photo description"
multiple_link_description = (
"multiple_link_description",
"multiple link description",
)
multiple_photo_description = (
"multiple_photo_description",
"multiple photo description",
)
multiple_links = "multiple_links", "multiple links"
order = models.IntegerField(unique=True)
text = models.CharField(max_length=300)
@ -65,7 +71,7 @@ class QuestionDeckHint(models.Model):
class QuestionAnswer(models.Model):
deck = models.FloatField("PitchDeck")
deck = models.ForeignKey("PitchDeck", on_delete=models.CASCADE)
question = models.ForeignKey("Question", on_delete=models.CASCADE)
answer = models.JSONField(default=dict)

View File

@ -0,0 +1,11 @@
from django.db.models.signals import post_save
from django.dispatch import receiver
from pitch_deck_generator.decks.models import PitchDeck
from pitch_deck_generator.decks.tasks import run_pitch_deck_calculation
@receiver(post_save, sender=PitchDeck)
def tag_create(sender, instance: PitchDeck, created, **kwargs):
if created:
run_pitch_deck_calculation.apply_async(kwargs={"pk": instance.pk})

View File

@ -1,14 +1,52 @@
import requests
from celery import shared_task
from ml.openai_handle import create_name_hint, create_hints
from pitch_deck_generator.decks.models import PitchDeck, Question, QuestionDeckHint
data_types = {
"names": ("text", 1),
"type": ("select", 13),
"category": ("text", 14),
"users": ("text", 2),
"problems": ("text", 3),
"actuality": ("text", 4),
"solve": ("text", 5),
"works": ("text", 6),
"awards": ("text", 7),
"market_values": ("multiple_range", 8),
"percentage": ("multiple_range", 9),
"project_stage": ("select", 10),
"money": ("text", 11),
"financial_indicators": ("text", 33),
"users_metrics": ("multiple_range", 12),
"aims": ("text", 15),
"money_recieved": ("number", 16),
"past_investors": ("text", 17),
"how_much_investments": ("range", 18),
"finance_model": ("link", 19),
"company_value": ("range", 20),
"investments_sold": ("text", 21),
"time_to_spend": ("date", 22),
"achieve": ("text", 23),
"future_value": ("range", 24),
"your_role": ("photo_description", 25),
"your_teammates": ("multiple_photo_description", 26),
"competitors": ("text", 27),
"competitors_strength": ("text", 28),
"competitors_low": ("text", 29),
"advantages": ("text", 30),
"images": ("multiple_photo", 31),
"links": ("multiple_link_description", 32),
}
@shared_task
def run_pitch_deck_calculation(pk: int):
generate_pitch_deck_name.apply_async(kwargs={"pk": pk})
generate_known_values.apply_async(kwargs={"pk": pk})
for i in range(3):
generate_pitch_deck_name.apply_async(kwargs={"pk": pk, "num": pk})
generate_batch_hints.apply_async(kwargs={"pk": pk, "num": i})
@shared_task
@ -23,8 +61,67 @@ def generate_pitch_deck_name(pk: int):
)
@shared_task
def generate_known_values(pk: int):
pitch_deck = PitchDeck.objects.get(pk=pk)
_, question_id = data_types["category"]
QuestionDeckHint.objects.create(
question_id=question_id,
deck=pitch_deck,
hint={
"type": "select",
"value": [
"Business Software",
"IndustrialTech",
"E-commerce",
"Advertising & Marketing",
"Hardware",
"RetailTech",
"ConstructionTech",
"Web3",
"EdTech",
"Business Intelligence",
"Cybersecurity",
"HrTech",
"Telecom & Communication",
"Media & Entertainment",
"FinTech",
"MedTech",
"Transport & Logistics",
"Gaming",
"FoodTech",
"AI",
"WorkTech",
"Consumer Goods & Services",
"Aero & SpaceTech",
"Legal & RegTech",
"Travel",
"PropTech",
"Energy",
"GreenTech",
],
},
)
req = requests.post(
"https://rare-needles-lead.loca.lt/search",
json={"body": pitch_deck.description},
)
data = req.json()
_, question_id = data_types["competitors"]
QuestionDeckHint.objects.create(
question_id=question_id, deck=pitch_deck, hint={"type": "cards", "value": data}
)
@shared_task
def generate_batch_hints(pk: int, num: int):
pitch_deck = PitchDeck.objects.get(pk=pk)
data = create_hints(pitch_deck.description, num)
print(data)
for el in data:
question_type, question_id = data_types[el["type"]]
QuestionDeckHint.objects.create(
question_id=question_id,
deck=pitch_deck,
hint={"type": question_type, "value": el["value"]},
)