diff --git a/.gitignore b/.gitignore index 86f93cd..59d7913 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ !.gitkeep +.idea ### Python template # Byte-compiled / optimized / DLL files diff --git a/config/settings/base.py b/config/settings/base.py index bfd5c35..8b4532b 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -351,4 +351,4 @@ ], } -OPENAI_KEY = env("OPENAI_KEY") +OPENAI_KEY = env("OPENAI_KEY", default="") diff --git a/ml/data.pic b/ml/data.pic new file mode 100644 index 0000000..acebbd7 Binary files /dev/null and b/ml/data.pic differ diff --git a/ml/index.ann b/ml/index.ann new file mode 100644 index 0000000..af5c0ee Binary files /dev/null and b/ml/index.ann differ diff --git a/ml/openai_handle.py b/ml/openai_handle.py index 6a55bb3..52cfc3a 100644 --- a/ml/openai_handle.py +++ b/ml/openai_handle.py @@ -33,38 +33,37 @@ prompts = [ """ -По тексту ответь или предположи ответ на вопросы в следющем формате: -{ - 'users': 'Кто будет пользоваться продуктом', - 'problems': 'Какие проблемы решает продукт', - 'actuality': 'Продолжите предложение: Актуальность проблемы подтверждается тем фактом, что...', - 'solve': 'Как решаем эти проблемы', - 'works': 'Как работает решение', -} -""", + По тексту ответь или предположи ответ на вопросы в следющем формате: + { + 'users': 'Кто будет пользоваться продуктом', + 'problems': 'Какие проблемы решает продукт', + 'actuality': 'Продолжите предложение: Актуальность проблемы подтверждается тем фактом, что...', + 'solve': 'Как решаем эти проблемы', + 'works': 'Как работает решение', + } + """, """ -По тексту ответь или предположи ответ на вопросы в следющем формате: -{ - 'awards': 'Когда проблема будет решена, какова будет ценность для ваших пользователей', - 'money': 'На чем проект зарабатывает? сколько и за что ему платят клиенты', - 'aims': Напиши 3 цели: на месяц, на полгода и год, формат: {'1': цель на месяц, '2': цель на полгода, '3': цель на год}, - 'investments_sold': 'На что потратить инвестиции под проект', - 'financial_indicators': 'Напиши финансовые показатели проекта' -} -""", + По тексту ответь или предположи ответ на вопросы в следющем формате: + { + 'awards': 'Когда проблема будет решена, какова будет ценность для ваших пользователей', + 'money': 'На чем проект зарабатывает? сколько и за что ему платят клиенты', + 'aims': Напиши 3 цели: на месяц, на полгода и год, формат: {'1': цель на месяц, '2': цель на полгода, '3': цель на год}, + 'investments_sold': 'На что потратить инвестиции под проект', + 'financial_indicators': 'Напиши финансовые показатели проекта' + } + """, """ -По тексту ответь или предположи ответ на вопросы в следющем формате: -{ + По тексту ответь или предположи ответ на вопросы в следющем формате: + { - 'achieve': 'Чего добьется команда после освоения инвестиций', - 'competitors_strength': 'Сильные стороны конкурентов', - 'competitors_low': 'Слабые стороны конкурентов', - 'advantages': 'Какие могут быть преимущества над конкурентами' -} -""", + 'achieve': 'Чего добьется команда после освоения инвестиций', + 'competitors_strength': 'Сильные стороны конкурентов', + 'competitors_low': 'Слабые стороны конкурентов', + 'advantages': 'Какие могут быть преимущества над конкурентами' + } + """, ] - openai.api_key = KEY assertions = [ @@ -105,15 +104,21 @@ def create_hints(description: str, stage: int): content["aims"] = [ { "aim": content["aims"]["1"], - "date": datetime.datetime.now() + datetime.timedelta(days=30), + "date": ( + datetime.datetime.now() + datetime.timedelta(days=30) + ).isoformat(), }, { "aim": content["aims"]["2"], - "date": datetime.datetime.now() + datetime.timedelta(days=180), + "date": ( + datetime.datetime.now() + datetime.timedelta(days=180) + ).isoformat(), }, { "aim": content["aims"]["3"], - "date": datetime.datetime.now() + datetime.timedelta(days=365), + "date": ( + datetime.datetime.now() + datetime.timedelta(days=365) + ).isoformat(), }, ] result = [] diff --git a/ml/search.py b/ml/search.py new file mode 100644 index 0000000..831e920 --- /dev/null +++ b/ml/search.py @@ -0,0 +1,49 @@ +from annoy import AnnoyIndex +from sentence_transformers import SentenceTransformer +import pickle + +model = None +data = None +index = None + + +def get_model(): + global model + if not model: + model = SentenceTransformer("sentence-transformers/LaBSE") + return model + + +def get_data(): + global data + if not data: + with open("ml/data.pic", "rb") as file: + data = pickle.load(file) + print(len(data)) + return data + + +def get_index(): + global index + if not index: + index = AnnoyIndex(768, "angular") + index.load("ml/index.ann") + return index + + +def search(search_string): + embs = get_model().encode([search_string])[0] + indexes = get_index().get_nns_by_vector(embs, 5) + res = [] + for i in indexes: + res.append(get_data()[i]) + return list( + map( + lambda x: { + "logo": x["image"], + "name": x["name"], + "description": x["description"], + }, + res, + ) + ) diff --git a/pitch_deck_generator/decks/api/serializers.py b/pitch_deck_generator/decks/api/serializers.py index 3b0f774..8ef4665 100644 --- a/pitch_deck_generator/decks/api/serializers.py +++ b/pitch_deck_generator/decks/api/serializers.py @@ -1,7 +1,16 @@ +from dateutil.parser import parse +from django.core.files.uploadedfile import InMemoryUploadedFile, TemporaryUploadedFile from drf_spectacular.utils import extend_schema_field from rest_framework import serializers +from rest_framework.generics import get_object_or_404 -from pitch_deck_generator.decks.models import PitchDeck, Question, QuestionDeckHint +from pitch_deck_generator.decks.models import ( + PitchDeck, + Question, + QuestionDeckHint, + QuestionAnswer, + QuestionAnswerPhoto, +) class BasePitchDeckSerializer(serializers.ModelSerializer): @@ -35,12 +44,17 @@ class QuestionSerializer(serializers.ModelSerializer): @extend_schema_field(HintSerializer) def get_hint(self, obj): if obj.hint: + question_id = ( + self.context["view"].kwargs["question_id"] + if "question_id" in self.context["view"].kwargs + else 1 + ) if q := QuestionDeckHint.objects.filter( - question_id=self.context["kwargs"]["question"], - deck_id=self.context["kwargs"]["deck"], + question_id=question_id, + deck_id=self.context["view"].kwargs["deck_id"], ): return q.first().hint - return "" + return {} return False @extend_schema_field(serializers.IntegerField) @@ -51,4 +65,165 @@ def get_next_id(self, obj): class Meta: model = Question - fields = ["id", "text", "hint", "required", "params", "next_id"] + fields = ["id", "text", "hint", "required", "type", "params", "next_id"] + + +class AnswerSerializer(serializers.ModelSerializer): + def __init__(self, *args, **kwargs): + file_fields = kwargs.pop("file_fields", None) + super().__init__(*args, **kwargs) + if file_fields: + field_update_dict = { + field: serializers.FileField(required=False, write_only=True) + for field in file_fields + } + self.fields.update(**field_update_dict) + + file = serializers.FileField(allow_null=True, required=False) + + class Meta: + model = QuestionAnswer + fields = ["answer", "deck", "question", "file"] + extra_kwargs = { + "deck": {"read_only": True}, + "question": {"read_only": True}, + } + + def validate(self, data): + answer = data["answer"] + question = get_object_or_404( + Question, id=self.context["view"].kwargs["question_id"] + ) + deck = get_object_or_404(PitchDeck, id=self.context["view"].kwargs["deck_id"]) + question_type = question.type + params = question.params if question.params else {} + match question_type: + case "text": + if type(answer) is not str: + raise serializers.ValidationError("Incorrect type") + if "max_length" in params: + if len(answer) > params["max_length"]: + raise serializers.ValidationError("Text is too long") + case "number": + if type(answer) is not str: + raise serializers.ValidationError("Incorrect type") + case "text_array": + if type(answer) is not list: + raise serializers.ValidationError("Incorrect type") + + if any([type(x) is not str for x in answer]): + raise serializers.ValidationError("Incorrect type") + case "range": + slug = params["slug"] + if slug not in answer: + raise serializers.ValidationError("Value to found") + if not isinstance(answer[slug], (int, float)): + raise serializers.ValidationError("Incorrect type") + if not (params["min_value"] <= answer[slug] <= params["max_value"]): + raise serializers.ValidationError("Number is too big or too small") + case "multiple_range": + for slug in [x["slug"] for x in params["scrollbars"]]: + if slug not in answer: + raise serializers.ValidationError(f"Value {slug} to found") + if not isinstance(answer[slug], (int, float)): + raise serializers.ValidationError(f"Incorrect {slug} type") + if not (params["min_value"] <= answer[slug] <= params["max_value"]): + raise serializers.ValidationError( + f"Number is too big or too small for {slug}" + ) + case "select": + if answer not in params["options"]: + raise serializers.ValidationError("No such option") + case "date": + try: + parse(answer) + except ValueError: + raise serializers.ValidationError("Incorrect date type") + case "photo": + if answer: + raise serializers.ValidationError("Answer should be blank") + + if not data["file"]: + raise serializers.ValidationError("No file found") + case "multiple_photo": + if answer: + raise serializers.ValidationError("Answer should be blank") + for key, value in data.items(): + if isinstance(value, InMemoryUploadedFile): + if "_" not in key: + raise serializers.ValidationError( + "You should use file_num for file keys" + ) + try: + int(key.split("_")[1]) + except ValueError: + raise serializers.ValidationError( + "You should use file_num for file keys" + ) + + case "photo_description": + if not data["file"]: + raise serializers.ValidationError("No file found") + if type(answer) is not str: + raise serializers.ValidationError("Incorrect type") + if "max_length" in params: + if len(answer) > params["max_length"]: + raise serializers.ValidationError("Text is too long") + case "multiple_photo_description": + if type(answer) is not list: + raise serializers.ValidationError("Incorrect type") + + if any([type(x) is not str for x in answer]): + raise serializers.ValidationError("Incorrect type") + + len_f = 0 + + for key, value in data.items(): + if isinstance(value, TemporaryUploadedFile): + if "_" not in key: + raise serializers.ValidationError( + "You should use file_num for file keys" + ) + try: + int(key.split("_")[1]) + except ValueError: + raise serializers.ValidationError( + "You should use file_num for file keys" + ) + len_f += 1 + if len_f != len(answer): + raise serializers.ValidationError( + "You should provide the same amount of answers in list as photos" + ) + + case "multiple_link_description": + if type(answer) is not dict: + raise serializers.ValidationError("Incorrect type") + if any([type(x) is not str for x in answer.keys()]): + raise serializers.ValidationError("Incorrect type") + if any([type(x) is not str for x in answer.values()]): + raise serializers.ValidationError("Incorrect type") + + data["question_id"] = question.id + data["deck_id"] = deck.id + return data + + def create(self, validated_data): + q = QuestionAnswer.objects.get_or_create( + deck_id=validated_data["deck_id"], question_id=validated_data["question_id"] + )[0] + q.answer = validated_data["answer"] + q.save() + + s = [ + key + for key, val in validated_data.items() + if isinstance(val, TemporaryUploadedFile) and key != "file" + ] + if "file" in validated_data: + QuestionAnswerPhoto.objects.create(answer=q, file=validated_data["file"]) + elif s: + s.sort(key=lambda x: int(x.split("_")[1])) + for key in s: + QuestionAnswerPhoto.objects.create(answer=q, file=validated_data[key]) + return q diff --git a/pitch_deck_generator/decks/api/urls.py b/pitch_deck_generator/decks/api/urls.py index 1945e9f..087b454 100644 --- a/pitch_deck_generator/decks/api/urls.py +++ b/pitch_deck_generator/decks/api/urls.py @@ -5,7 +5,7 @@ RetrievePitchApiView, GetFirstQuestionApiView, GetDeckQuestionApiView, - GetDeckQuestionHintApiView, + GetDeckQuestionHintApiView, CreateQuestionAnswerApiView, ) app_name = "decks" @@ -15,5 +15,6 @@ path("", RetrievePitchApiView.as_view()), path("question/", GetFirstQuestionApiView.as_view()), path("question//", GetDeckQuestionApiView.as_view()), + path("question///", CreateQuestionAnswerApiView.as_view()), path("hint//", GetDeckQuestionHintApiView.as_view()), ] diff --git a/pitch_deck_generator/decks/api/views.py b/pitch_deck_generator/decks/api/views.py index d9028ad..4210474 100644 --- a/pitch_deck_generator/decks/api/views.py +++ b/pitch_deck_generator/decks/api/views.py @@ -1,13 +1,16 @@ -from rest_framework import generics +from rest_framework import generics, status from rest_framework.generics import get_object_or_404 from rest_framework.response import Response +from rest_framework.parsers import JSONParser, FormParser, MultiPartParser from pitch_deck_generator.decks.api.serializers import ( BasePitchDeckSerializer, PitchDeckSerializer, - QuestionSerializer, HintSerializer, + QuestionSerializer, + HintSerializer, + AnswerSerializer, ) -from pitch_deck_generator.decks.models import PitchDeck +from pitch_deck_generator.decks.models import PitchDeck, QuestionDeckHint, Question class ListDecksApiView(generics.ListCreateAPIView): @@ -22,22 +25,48 @@ def get_object(self): return get_object_or_404(PitchDeck, id=self.kwargs["id"]) -class GetFirstQuestionApiView(generics.GenericAPIView): +class GetFirstQuestionApiView(generics.RetrieveAPIView): serializer_class = QuestionSerializer - def get(self, request, *args, **kwargs): - return Response() + def get_object(self): + return Question.objects.get(order=1) -class GetDeckQuestionApiView(generics.GenericAPIView): +class GetDeckQuestionApiView(generics.RetrieveAPIView): serializer_class = QuestionSerializer - def get(self, request, *args, **kwargs): - return Response() + def get_object(self): + return get_object_or_404(Question, id=self.kwargs["question_id"]) + + +class CreateQuestionAnswerApiView(generics.CreateAPIView): + serializer_class = AnswerSerializer + + def create(self, request, *args, **kwargs): + # main thing starts + file_fields = list(request.FILES.keys()) # list to be passed to the serializer + serializer = self.get_serializer(data=request.data, file_fields=file_fields) + # main thing ends + + serializer.is_valid(raise_exception=True) + self.perform_create(serializer) + headers = self.get_success_headers(serializer.data) + return Response( + serializer.data, status=status.HTTP_201_CREATED, headers=headers + ) class GetDeckQuestionHintApiView(generics.GenericAPIView): serializer_class = HintSerializer + parser_classes = [JSONParser, FormParser, MultiPartParser] def get(self, request, *args, **kwargs): - return Response() + hint = get_object_or_404( + QuestionDeckHint, + question_id=self.kwargs["question_id"], + deck_id=self.kwargs["deck_id"], + ) + data = hint.hint + if data: + return Response(data) + return Response(status=404) diff --git a/pitch_deck_generator/decks/apps.py b/pitch_deck_generator/decks/apps.py index 8d2b5cb..67c5cfc 100644 --- a/pitch_deck_generator/decks/apps.py +++ b/pitch_deck_generator/decks/apps.py @@ -4,3 +4,9 @@ class DecksConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "pitch_deck_generator.decks" + + def ready(self): + try: + import pitch_deck_generator.decks.signals # noqa F401 + except ImportError: + pass diff --git a/pitch_deck_generator/decks/migrations/0007_alter_question_type_alter_questionanswer_deck.py b/pitch_deck_generator/decks/migrations/0007_alter_question_type_alter_questionanswer_deck.py new file mode 100644 index 0000000..f437642 --- /dev/null +++ b/pitch_deck_generator/decks/migrations/0007_alter_question_type_alter_questionanswer_deck.py @@ -0,0 +1,44 @@ +# Generated by Django 4.2.4 on 2023-08-26 00:22 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("decks", "0006_alter_question_type"), + ] + + operations = [ + migrations.AlterField( + model_name="question", + name="type", + field=models.CharField( + choices=[ + ("text", "Text"), + ("number", "Number"), + ("text_array", "text array"), + ("range", "Range"), + ("multiple_range", "multiple range"), + ("select", "Select"), + ("link", "Link"), + ("date", "Date"), + ("photo", "Photo"), + ("multiple_photo", "multiple photo"), + ("photo_description", "photo description"), + ("multiple_link_description", "multiple link description"), + ("multiple_photo_description", "multiple photo description"), + ("multiple_links", "multiple links"), + ], + max_length=26, + ), + ), + migrations.AlterField( + model_name="questionanswer", + name="deck", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="decks.pitchdeck" + ), + ), + ] diff --git a/pitch_deck_generator/decks/models.py b/pitch_deck_generator/decks/models.py index d76c953..4b0b762 100644 --- a/pitch_deck_generator/decks/models.py +++ b/pitch_deck_generator/decks/models.py @@ -26,18 +26,24 @@ class Question(models.Model): class QuestionType(models.TextChoices): text = "text" number = "number" - text_array = "text_array" + text_array = "text_array", "text array" range = "range" - multiple_range = "multiple_range" + multiple_range = "multiple_range", "multiple range" select = "select" link = "link" date = "date" photo = "photo" - multiple_photo = "multiple_photo" - photo_description = "photo_description" - multiple_link_description = "multiple link description" - multiple_photo_description = "multiple photo description" - multiple_links = "multiple_links" + multiple_photo = "multiple_photo", "multiple photo" + photo_description = "photo_description", "photo description" + multiple_link_description = ( + "multiple_link_description", + "multiple link description", + ) + multiple_photo_description = ( + "multiple_photo_description", + "multiple photo description", + ) + multiple_links = "multiple_links", "multiple links" order = models.IntegerField(unique=True) text = models.CharField(max_length=300) @@ -65,7 +71,7 @@ class QuestionDeckHint(models.Model): class QuestionAnswer(models.Model): - deck = models.FloatField("PitchDeck") + deck = models.ForeignKey("PitchDeck", on_delete=models.CASCADE) question = models.ForeignKey("Question", on_delete=models.CASCADE) answer = models.JSONField(default=dict) diff --git a/pitch_deck_generator/decks/signals.py b/pitch_deck_generator/decks/signals.py new file mode 100644 index 0000000..5d13bbb --- /dev/null +++ b/pitch_deck_generator/decks/signals.py @@ -0,0 +1,11 @@ +from django.db.models.signals import post_save +from django.dispatch import receiver + +from pitch_deck_generator.decks.models import PitchDeck +from pitch_deck_generator.decks.tasks import run_pitch_deck_calculation + + +@receiver(post_save, sender=PitchDeck) +def tag_create(sender, instance: PitchDeck, created, **kwargs): + if created: + run_pitch_deck_calculation.apply_async(kwargs={"pk": instance.pk}) diff --git a/pitch_deck_generator/decks/tasks.py b/pitch_deck_generator/decks/tasks.py index 5c8c428..4e86521 100644 --- a/pitch_deck_generator/decks/tasks.py +++ b/pitch_deck_generator/decks/tasks.py @@ -1,14 +1,52 @@ +import requests from celery import shared_task from ml.openai_handle import create_name_hint, create_hints from pitch_deck_generator.decks.models import PitchDeck, Question, QuestionDeckHint +data_types = { + "names": ("text", 1), + "type": ("select", 13), + "category": ("text", 14), + "users": ("text", 2), + "problems": ("text", 3), + "actuality": ("text", 4), + "solve": ("text", 5), + "works": ("text", 6), + "awards": ("text", 7), + "market_values": ("multiple_range", 8), + "percentage": ("multiple_range", 9), + "project_stage": ("select", 10), + "money": ("text", 11), + "financial_indicators": ("text", 33), + "users_metrics": ("multiple_range", 12), + "aims": ("text", 15), + "money_recieved": ("number", 16), + "past_investors": ("text", 17), + "how_much_investments": ("range", 18), + "finance_model": ("link", 19), + "company_value": ("range", 20), + "investments_sold": ("text", 21), + "time_to_spend": ("date", 22), + "achieve": ("text", 23), + "future_value": ("range", 24), + "your_role": ("photo_description", 25), + "your_teammates": ("multiple_photo_description", 26), + "competitors": ("text", 27), + "competitors_strength": ("text", 28), + "competitors_low": ("text", 29), + "advantages": ("text", 30), + "images": ("multiple_photo", 31), + "links": ("multiple_link_description", 32), +} + @shared_task def run_pitch_deck_calculation(pk: int): generate_pitch_deck_name.apply_async(kwargs={"pk": pk}) + generate_known_values.apply_async(kwargs={"pk": pk}) for i in range(3): - generate_pitch_deck_name.apply_async(kwargs={"pk": pk, "num": pk}) + generate_batch_hints.apply_async(kwargs={"pk": pk, "num": i}) @shared_task @@ -23,8 +61,67 @@ def generate_pitch_deck_name(pk: int): ) +@shared_task +def generate_known_values(pk: int): + pitch_deck = PitchDeck.objects.get(pk=pk) + _, question_id = data_types["category"] + QuestionDeckHint.objects.create( + question_id=question_id, + deck=pitch_deck, + hint={ + "type": "select", + "value": [ + "Business Software", + "IndustrialTech", + "E-commerce", + "Advertising & Marketing", + "Hardware", + "RetailTech", + "ConstructionTech", + "Web3", + "EdTech", + "Business Intelligence", + "Cybersecurity", + "HrTech", + "Telecom & Communication", + "Media & Entertainment", + "FinTech", + "MedTech", + "Transport & Logistics", + "Gaming", + "FoodTech", + "AI", + "WorkTech", + "Consumer Goods & Services", + "Aero & SpaceTech", + "Legal & RegTech", + "Travel", + "PropTech", + "Energy", + "GreenTech", + ], + }, + ) + + req = requests.post( + "https://rare-needles-lead.loca.lt/search", + json={"body": pitch_deck.description}, + ) + data = req.json() + _, question_id = data_types["competitors"] + QuestionDeckHint.objects.create( + question_id=question_id, deck=pitch_deck, hint={"type": "cards", "value": data} + ) + + @shared_task def generate_batch_hints(pk: int, num: int): pitch_deck = PitchDeck.objects.get(pk=pk) data = create_hints(pitch_deck.description, num) - print(data) + for el in data: + question_type, question_id = data_types[el["type"]] + QuestionDeckHint.objects.create( + question_id=question_id, + deck=pitch_deck, + hint={"type": question_type, "value": el["value"]}, + )