mirror of
https://github.com/Ai-hack-MAGNUM-OPUS/backend.git
synced 2024-11-22 00:06:34 +03:00
added async task worker, state save
This commit is contained in:
parent
0c1d881dff
commit
477afd4278
|
@ -8,3 +8,9 @@ class DocxSerializer(serializers.ModelSerializer):
|
|||
model = Docx
|
||||
fields = ["uuid", "file"]
|
||||
extra_kwargs = {"uuid": {"read_only": True}}
|
||||
|
||||
|
||||
class DocxStateSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = Docx
|
||||
fields = ["paragraphs_loaded", "paragraphs_processed"]
|
||||
|
|
|
@ -1,11 +1,31 @@
|
|||
from rest_framework import generics
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.generics import get_object_or_404
|
||||
from rest_framework.parsers import MultiPartParser, FormParser
|
||||
|
||||
from checker.api.serializers import DocxSerializer
|
||||
from checker.models import Docx
|
||||
from checker.api.serializers import DocxSerializer, DocxStateSerializer
|
||||
from checker.models import Docx, ParagraphType
|
||||
|
||||
|
||||
class ListCreateDocxApiView(generics.ListCreateAPIView):
|
||||
parser_classes = [FormParser, MultiPartParser]
|
||||
serializer_class = DocxSerializer
|
||||
queryset = Docx.objects.all()
|
||||
|
||||
|
||||
class GetDocxState(generics.RetrieveAPIView):
|
||||
lookup_field = "uuid"
|
||||
queryset = Docx.objects.all()
|
||||
serializer_class = DocxStateSerializer
|
||||
|
||||
|
||||
class RetireDocxSerializer(APIView):
|
||||
def get(self, request, uuid):
|
||||
doc = get_object_or_404(Docx, uuid=uuid)
|
||||
res = {}
|
||||
paragraphs = ParagraphType.objects.filter(paragraphs__docx=doc)
|
||||
for p in paragraphs:
|
||||
res[p.name] = [x.text for x in p.paragraphs.filter(docx=doc)]
|
||||
return Response(res)
|
||||
|
||||
|
|
|
@ -2,16 +2,24 @@ import uuid as uuid
|
|||
from django.db import models
|
||||
|
||||
# Create your models here.
|
||||
from checker.services.file import media_upload_path
|
||||
|
||||
|
||||
class Docx(models.Model):
|
||||
uuid = models.UUIDField(
|
||||
default=uuid.uuid4, editable=False, unique=True, primary_key=True
|
||||
)
|
||||
file = models.FileField(upload_to="")
|
||||
file = models.FileField(upload_to=media_upload_path)
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
paragraphs_processed = models.IntegerField(default=0)
|
||||
paragraphs_loaded = models.IntegerField(default=0)
|
||||
|
||||
def __str__(self):
|
||||
return self.uuid
|
||||
return str(self.uuid)
|
||||
|
||||
class Meta:
|
||||
ordering = ["-created"]
|
||||
|
||||
|
||||
class ParagraphType(models.Model):
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
import os
|
||||
|
||||
from checker.services.generators import generate_charset
|
||||
|
||||
|
||||
def process_paragraphs(text):
|
||||
paragraphs = {}
|
||||
c = 0
|
||||
|
@ -12,4 +17,8 @@ def process_paragraphs(text):
|
|||
print()
|
||||
if c:
|
||||
paragraphs[c] += line
|
||||
return paragraphs
|
||||
return paragraphs
|
||||
|
||||
|
||||
def media_upload_path(instance, filename):
|
||||
return os.path.join(f"uploads/{generate_charset(7)}/", filename)
|
||||
|
|
7
checker/services/generators.py
Normal file
7
checker/services/generators.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
import random
|
||||
import string
|
||||
|
||||
|
||||
def generate_charset(length: int) -> str:
|
||||
"""Generate a random string of characters of a given length."""
|
||||
return "".join(random.choice(string.ascii_letters) for _ in range(length))
|
|
@ -1,19 +1,18 @@
|
|||
from pprint import pprint
|
||||
import requests
|
||||
import asyncio
|
||||
|
||||
import docx2txt
|
||||
from django.conf import settings
|
||||
from django.db.models.signals import post_save
|
||||
from django.dispatch import receiver
|
||||
|
||||
from checker.models import Docx
|
||||
from checker.models import Docx, Paragraph
|
||||
from checker.services.file import process_paragraphs
|
||||
from checker.tasks import process_file
|
||||
import threading
|
||||
import asyncio
|
||||
|
||||
|
||||
@receiver(post_save, sender=Docx)
|
||||
def create_docs(sender, instance, created, **kwargs):
|
||||
if created:
|
||||
document = docx2txt.process(instance.file.path)
|
||||
paragraphs = process_paragraphs(document.split("\n"))
|
||||
x = requests.post(settings.AI_URL, json=paragraphs)
|
||||
process_file.apply_async((instance.pk))
|
||||
return
|
||||
|
|
|
@ -1,10 +1,32 @@
|
|||
from time import sleep
|
||||
|
||||
import docx2txt
|
||||
import requests
|
||||
from celery import shared_task
|
||||
from uuid import uuid4
|
||||
|
||||
from checker.models import Docx
|
||||
from django.conf import settings
|
||||
|
||||
from checker.models import Paragraph, Docx
|
||||
from checker.services.file import process_paragraphs
|
||||
|
||||
|
||||
@shared_task(name="process_file")
|
||||
def process_file(file: uuid4):
|
||||
print(file)
|
||||
@shared_task()
|
||||
def process_file(pk: int):
|
||||
file = Docx.objects.get(pk=pk)
|
||||
document = docx2txt.process(file.file.path)
|
||||
paragraphs = process_paragraphs(document.split("\n"))
|
||||
|
||||
file.paragraphs_loaded = len(paragraphs)
|
||||
file.save(update_fields=["paragraphs_loaded"])
|
||||
|
||||
x = requests.post("http://185.244.175.164:5000/api", json=paragraphs)
|
||||
for el_id, type_id in x.json().items():
|
||||
Paragraph.objects.create(
|
||||
type_id=type_id, docx=file, text=paragraphs[el_id]
|
||||
)
|
||||
|
||||
file.paragraphs_processed = len(paragraphs)
|
||||
file.save(update_fields=["paragraphs_processed"])
|
||||
|
||||
return file
|
||||
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
from django.urls import path, include
|
||||
|
||||
from checker.api.views import ListCreateDocxApiView
|
||||
from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState
|
||||
|
||||
urlpatterns = [
|
||||
path("health/", include("health_check.urls")),
|
||||
path("docx/", ListCreateDocxApiView.as_view(), name="list_create_docx")
|
||||
path("docx/", ListCreateDocxApiView.as_view(), name="list_create_docx"),
|
||||
path("docx/<uuid:uuid>", RetireDocxSerializer.as_view(), name="get_docx"),
|
||||
path("state/<uuid:uuid>", GetDocxState.as_view(), name="get_state_docx"),
|
||||
]
|
||||
|
|
|
@ -5,7 +5,7 @@ from celery import Celery
|
|||
# Set the default Django settings module for the 'celery' program.
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "conf.settings.local")
|
||||
|
||||
app = Celery("mistake_checker_hack_backend")
|
||||
app = Celery("conf")
|
||||
|
||||
# Using a string here means the worker doesn't have to serialize
|
||||
# the configuration object to child processes.
|
||||
|
|
|
@ -203,3 +203,6 @@ CELERY_BROKER_URL = 'redis://localhost:6379/0'
|
|||
CELERY_TIMEZONE = "Europe/Moscow"
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_TASK_TIME_LIMIT = 30 * 60
|
||||
CELERY_ACCEPT_CONTENT = ['json']
|
||||
CELERY_TASK_SERIALIZER = 'json'
|
||||
CELERY_RESULT_SERIALIZER = 'json'
|
||||
|
|
|
@ -14,4 +14,5 @@ django_celery_results==2.4.0
|
|||
psutil
|
||||
dj-database-url
|
||||
uuid
|
||||
docx2txt
|
||||
docx2txt
|
||||
requests-async
|
Loading…
Reference in New Issue
Block a user