mirror of
https://github.com/Ai-hack-MAGNUM-OPUS/backend.git
synced 2024-11-22 00:06:34 +03:00
added async task worker, state save
This commit is contained in:
parent
0c1d881dff
commit
477afd4278
|
@ -8,3 +8,9 @@ class DocxSerializer(serializers.ModelSerializer):
|
||||||
model = Docx
|
model = Docx
|
||||||
fields = ["uuid", "file"]
|
fields = ["uuid", "file"]
|
||||||
extra_kwargs = {"uuid": {"read_only": True}}
|
extra_kwargs = {"uuid": {"read_only": True}}
|
||||||
|
|
||||||
|
|
||||||
|
class DocxStateSerializer(serializers.ModelSerializer):
|
||||||
|
class Meta:
|
||||||
|
model = Docx
|
||||||
|
fields = ["paragraphs_loaded", "paragraphs_processed"]
|
||||||
|
|
|
@ -1,11 +1,31 @@
|
||||||
from rest_framework import generics
|
from rest_framework import generics
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework.views import APIView
|
||||||
|
from rest_framework.generics import get_object_or_404
|
||||||
from rest_framework.parsers import MultiPartParser, FormParser
|
from rest_framework.parsers import MultiPartParser, FormParser
|
||||||
|
|
||||||
from checker.api.serializers import DocxSerializer
|
from checker.api.serializers import DocxSerializer, DocxStateSerializer
|
||||||
from checker.models import Docx
|
from checker.models import Docx, ParagraphType
|
||||||
|
|
||||||
|
|
||||||
class ListCreateDocxApiView(generics.ListCreateAPIView):
|
class ListCreateDocxApiView(generics.ListCreateAPIView):
|
||||||
parser_classes = [FormParser, MultiPartParser]
|
parser_classes = [FormParser, MultiPartParser]
|
||||||
serializer_class = DocxSerializer
|
serializer_class = DocxSerializer
|
||||||
queryset = Docx.objects.all()
|
queryset = Docx.objects.all()
|
||||||
|
|
||||||
|
|
||||||
|
class GetDocxState(generics.RetrieveAPIView):
|
||||||
|
lookup_field = "uuid"
|
||||||
|
queryset = Docx.objects.all()
|
||||||
|
serializer_class = DocxStateSerializer
|
||||||
|
|
||||||
|
|
||||||
|
class RetireDocxSerializer(APIView):
|
||||||
|
def get(self, request, uuid):
|
||||||
|
doc = get_object_or_404(Docx, uuid=uuid)
|
||||||
|
res = {}
|
||||||
|
paragraphs = ParagraphType.objects.filter(paragraphs__docx=doc)
|
||||||
|
for p in paragraphs:
|
||||||
|
res[p.name] = [x.text for x in p.paragraphs.filter(docx=doc)]
|
||||||
|
return Response(res)
|
||||||
|
|
||||||
|
|
|
@ -2,16 +2,24 @@ import uuid as uuid
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
# Create your models here.
|
# Create your models here.
|
||||||
|
from checker.services.file import media_upload_path
|
||||||
|
|
||||||
|
|
||||||
class Docx(models.Model):
|
class Docx(models.Model):
|
||||||
uuid = models.UUIDField(
|
uuid = models.UUIDField(
|
||||||
default=uuid.uuid4, editable=False, unique=True, primary_key=True
|
default=uuid.uuid4, editable=False, unique=True, primary_key=True
|
||||||
)
|
)
|
||||||
file = models.FileField(upload_to="")
|
file = models.FileField(upload_to=media_upload_path)
|
||||||
|
created = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
paragraphs_processed = models.IntegerField(default=0)
|
||||||
|
paragraphs_loaded = models.IntegerField(default=0)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.uuid
|
return str(self.uuid)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
ordering = ["-created"]
|
||||||
|
|
||||||
|
|
||||||
class ParagraphType(models.Model):
|
class ParagraphType(models.Model):
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from checker.services.generators import generate_charset
|
||||||
|
|
||||||
|
|
||||||
def process_paragraphs(text):
|
def process_paragraphs(text):
|
||||||
paragraphs = {}
|
paragraphs = {}
|
||||||
c = 0
|
c = 0
|
||||||
|
@ -13,3 +18,7 @@ def process_paragraphs(text):
|
||||||
if c:
|
if c:
|
||||||
paragraphs[c] += line
|
paragraphs[c] += line
|
||||||
return paragraphs
|
return paragraphs
|
||||||
|
|
||||||
|
|
||||||
|
def media_upload_path(instance, filename):
|
||||||
|
return os.path.join(f"uploads/{generate_charset(7)}/", filename)
|
||||||
|
|
7
checker/services/generators.py
Normal file
7
checker/services/generators.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
def generate_charset(length: int) -> str:
|
||||||
|
"""Generate a random string of characters of a given length."""
|
||||||
|
return "".join(random.choice(string.ascii_letters) for _ in range(length))
|
|
@ -1,19 +1,18 @@
|
||||||
from pprint import pprint
|
import asyncio
|
||||||
import requests
|
|
||||||
|
|
||||||
import docx2txt
|
import docx2txt
|
||||||
from django.conf import settings
|
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
|
||||||
from checker.models import Docx
|
from checker.models import Docx, Paragraph
|
||||||
from checker.services.file import process_paragraphs
|
from checker.services.file import process_paragraphs
|
||||||
|
from checker.tasks import process_file
|
||||||
|
import threading
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, sender=Docx)
|
@receiver(post_save, sender=Docx)
|
||||||
def create_docs(sender, instance, created, **kwargs):
|
def create_docs(sender, instance, created, **kwargs):
|
||||||
if created:
|
if created:
|
||||||
document = docx2txt.process(instance.file.path)
|
process_file.apply_async((instance.pk))
|
||||||
paragraphs = process_paragraphs(document.split("\n"))
|
|
||||||
x = requests.post(settings.AI_URL, json=paragraphs)
|
|
||||||
return
|
return
|
||||||
|
|
|
@ -1,10 +1,32 @@
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
import docx2txt
|
||||||
|
import requests
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
from checker.models import Docx
|
from django.conf import settings
|
||||||
|
|
||||||
|
from checker.models import Paragraph, Docx
|
||||||
|
from checker.services.file import process_paragraphs
|
||||||
|
|
||||||
|
|
||||||
@shared_task(name="process_file")
|
@shared_task()
|
||||||
def process_file(file: uuid4):
|
def process_file(pk: int):
|
||||||
print(file)
|
file = Docx.objects.get(pk=pk)
|
||||||
|
document = docx2txt.process(file.file.path)
|
||||||
|
paragraphs = process_paragraphs(document.split("\n"))
|
||||||
|
|
||||||
|
file.paragraphs_loaded = len(paragraphs)
|
||||||
|
file.save(update_fields=["paragraphs_loaded"])
|
||||||
|
|
||||||
|
x = requests.post("http://185.244.175.164:5000/api", json=paragraphs)
|
||||||
|
for el_id, type_id in x.json().items():
|
||||||
|
Paragraph.objects.create(
|
||||||
|
type_id=type_id, docx=file, text=paragraphs[el_id]
|
||||||
|
)
|
||||||
|
|
||||||
|
file.paragraphs_processed = len(paragraphs)
|
||||||
|
file.save(update_fields=["paragraphs_processed"])
|
||||||
|
|
||||||
return file
|
return file
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
from django.urls import path, include
|
from django.urls import path, include
|
||||||
|
|
||||||
from checker.api.views import ListCreateDocxApiView
|
from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path("health/", include("health_check.urls")),
|
path("health/", include("health_check.urls")),
|
||||||
path("docx/", ListCreateDocxApiView.as_view(), name="list_create_docx")
|
path("docx/", ListCreateDocxApiView.as_view(), name="list_create_docx"),
|
||||||
|
path("docx/<uuid:uuid>", RetireDocxSerializer.as_view(), name="get_docx"),
|
||||||
|
path("state/<uuid:uuid>", GetDocxState.as_view(), name="get_state_docx"),
|
||||||
]
|
]
|
||||||
|
|
|
@ -5,7 +5,7 @@ from celery import Celery
|
||||||
# Set the default Django settings module for the 'celery' program.
|
# Set the default Django settings module for the 'celery' program.
|
||||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "conf.settings.local")
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "conf.settings.local")
|
||||||
|
|
||||||
app = Celery("mistake_checker_hack_backend")
|
app = Celery("conf")
|
||||||
|
|
||||||
# Using a string here means the worker doesn't have to serialize
|
# Using a string here means the worker doesn't have to serialize
|
||||||
# the configuration object to child processes.
|
# the configuration object to child processes.
|
||||||
|
|
|
@ -203,3 +203,6 @@ CELERY_BROKER_URL = 'redis://localhost:6379/0'
|
||||||
CELERY_TIMEZONE = "Europe/Moscow"
|
CELERY_TIMEZONE = "Europe/Moscow"
|
||||||
CELERY_TASK_TRACK_STARTED = True
|
CELERY_TASK_TRACK_STARTED = True
|
||||||
CELERY_TASK_TIME_LIMIT = 30 * 60
|
CELERY_TASK_TIME_LIMIT = 30 * 60
|
||||||
|
CELERY_ACCEPT_CONTENT = ['json']
|
||||||
|
CELERY_TASK_SERIALIZER = 'json'
|
||||||
|
CELERY_RESULT_SERIALIZER = 'json'
|
||||||
|
|
|
@ -15,3 +15,4 @@ psutil
|
||||||
dj-database-url
|
dj-database-url
|
||||||
uuid
|
uuid
|
||||||
docx2txt
|
docx2txt
|
||||||
|
requests-async
|
Loading…
Reference in New Issue
Block a user