added async task worker, state save

2025-11-03 09:17:25 +03:00 · 2022-08-27 07:38:54 +03:00 · 2022-08-27 07:38:54 +03:00 · 477afd4278
commit 477afd4278
parent 0c1d881dff
11 changed files with 98 additions and 21 deletions
--- a/checker/api/serializers.py
+++ b/checker/api/serializers.py
@ -8,3 +8,9 @@ class DocxSerializer(serializers.ModelSerializer):
        model = Docx
        fields = ["uuid", "file"]
        extra_kwargs = {"uuid": {"read_only": True}}
 class DocxStateSerializer(serializers.ModelSerializer):
    class Meta:
        model = Docx
        fields = ["paragraphs_loaded", "paragraphs_processed"]
--- a/checker/api/views.py
+++ b/checker/api/views.py
@ -1,11 +1,31 @@
 from rest_framework import generics
 from rest_framework.response import Response
 from rest_framework.views import APIView
 from rest_framework.generics import get_object_or_404
 from rest_framework.parsers import MultiPartParser, FormParser
-from checker.api.serializers import DocxSerializer
+from checker.api.serializers import DocxSerializer, DocxStateSerializer
-from checker.models import Docx
+from checker.models import Docx, ParagraphType
 class ListCreateDocxApiView(generics.ListCreateAPIView):
    parser_classes = [FormParser, MultiPartParser]
    serializer_class = DocxSerializer
    queryset = Docx.objects.all()
 class GetDocxState(generics.RetrieveAPIView):
    lookup_field = "uuid"
    queryset = Docx.objects.all()
    serializer_class = DocxStateSerializer
 class RetireDocxSerializer(APIView):
    def get(self, request, uuid):
        doc = get_object_or_404(Docx, uuid=uuid)
        res = {}
        paragraphs = ParagraphType.objects.filter(paragraphs__docx=doc)
        for p in paragraphs:
            res[p.name] = [x.text for x in p.paragraphs.filter(docx=doc)]
        return Response(res)
--- a/checker/models.py
+++ b/checker/models.py
@ -2,16 +2,24 @@ import uuid as uuid
 from django.db import models
 # Create your models here.
 from checker.services.file import media_upload_path
 class Docx(models.Model):
    uuid = models.UUIDField(
        default=uuid.uuid4, editable=False, unique=True, primary_key=True
    )
-    file = models.FileField(upload_to="")
+    file = models.FileField(upload_to=media_upload_path)
    created = models.DateTimeField(auto_now_add=True)
    paragraphs_processed = models.IntegerField(default=0)
    paragraphs_loaded = models.IntegerField(default=0)
    def __str__(self):
-        return self.uuid
+        return str(self.uuid)
    class Meta:
        ordering = ["-created"]
 class ParagraphType(models.Model):
--- a/checker/services/file.py
+++ b/checker/services/file.py
@ -1,3 +1,8 @@
 import os
 from checker.services.generators import generate_charset
 def process_paragraphs(text):
    paragraphs = {}
    c = 0
@ -13,3 +18,7 @@ def process_paragraphs(text):
            if c:
                paragraphs[c] += line
    return paragraphs
 def media_upload_path(instance, filename):
    return os.path.join(f"uploads/{generate_charset(7)}/", filename)
--- a/checker/services/generators.py
+++ b/checker/services/generators.py
@ -0,0 +1,7 @@
 import random
 import string
 def generate_charset(length: int) -> str:
    """Generate a random string of characters of a given length."""
    return "".join(random.choice(string.ascii_letters) for _ in range(length))
--- a/checker/signals.py
+++ b/checker/signals.py
@ -1,19 +1,18 @@
-from pprint import pprint
+import asyncio
 import requests
 import docx2txt
 from django.conf import settings
 from django.db.models.signals import post_save
 from django.dispatch import receiver
-from checker.models import Docx
+from checker.models import Docx, Paragraph
 from checker.services.file import process_paragraphs
 from checker.tasks import process_file
 import threading
 import asyncio
@receiver(post_save, sender=Docx)
 def create_docs(sender, instance, created, **kwargs):
    if created:
-        document = docx2txt.process(instance.file.path)
+        process_file.apply_async((instance.pk))
        paragraphs = process_paragraphs(document.split("\n"))
        x = requests.post(settings.AI_URL, json=paragraphs)
        return
--- a/checker/tasks.py
+++ b/checker/tasks.py
@ -1,10 +1,32 @@
 from time import sleep
 import docx2txt
 import requests
 from celery import shared_task
 from uuid import uuid4
-from checker.models import Docx
+from django.conf import settings
 from checker.models import Paragraph, Docx
 from checker.services.file import process_paragraphs
-@shared_task(name="process_file")
+@shared_task()
-def process_file(file: uuid4):
+def process_file(pk: int):
-    print(file)
+    file = Docx.objects.get(pk=pk)
    document = docx2txt.process(file.file.path)
    paragraphs = process_paragraphs(document.split("\n"))
    file.paragraphs_loaded = len(paragraphs)
    file.save(update_fields=["paragraphs_loaded"])
    x = requests.post("http://185.244.175.164:5000/api", json=paragraphs)
    for el_id, type_id in x.json().items():
        Paragraph.objects.create(
            type_id=type_id, docx=file, text=paragraphs[el_id]
        )
    file.paragraphs_processed = len(paragraphs)
    file.save(update_fields=["paragraphs_processed"])
    return file
--- a/conf/api_router.py
+++ b/conf/api_router.py
@ -1,8 +1,10 @@
 from django.urls import path, include
-from checker.api.views import ListCreateDocxApiView
+from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState
 urlpatterns = [
    path("health/", include("health_check.urls")),
-    path("docx/", ListCreateDocxApiView.as_view(), name="list_create_docx")
+    path("docx/", ListCreateDocxApiView.as_view(), name="list_create_docx"),
    path("docx/<uuid:uuid>", RetireDocxSerializer.as_view(), name="get_docx"),
    path("state/<uuid:uuid>", GetDocxState.as_view(), name="get_state_docx"),
 ]
--- a/conf/celery.py
+++ b/conf/celery.py
@ -5,7 +5,7 @@ from celery import Celery
 # Set the default Django settings module for the 'celery' program.
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "conf.settings.local")
-app = Celery("mistake_checker_hack_backend")
+app = Celery("conf")
 # Using a string here means the worker doesn't have to serialize
 # the configuration object to child processes.
--- a/conf/settings/base.py
+++ b/conf/settings/base.py
@ -203,3 +203,6 @@ CELERY_BROKER_URL = 'redis://localhost:6379/0'
 CELERY_TIMEZONE = "Europe/Moscow"
 CELERY_TASK_TRACK_STARTED = True
 CELERY_TASK_TIME_LIMIT = 30 * 60
 CELERY_ACCEPT_CONTENT = ['json']
 CELERY_TASK_SERIALIZER = 'json'
 CELERY_RESULT_SERIALIZER = 'json'
--- a/requirements/base.txt
+++ b/requirements/base.txt
@ -15,3 +15,4 @@ psutil
 dj-database-url
 uuid
 docx2txt
 requests-async