implemented word endpoints

2025-11-08 19:47:34 +03:00 · 2022-08-27 11:59:23 +03:00 · 2022-08-27 11:59:23 +03:00 · 1d859084d1
commit 1d859084d1
parent cdf082415c
7 changed files with 148 additions and 14 deletions
--- a/checker/api/serializers.py
+++ b/checker/api/serializers.py
@ -1,6 +1,6 @@
 from rest_framework import serializers
-from checker.models import Docx
+from checker.models import Docx, WordDocx
 class DocxSerializer(serializers.ModelSerializer):
@ -14,3 +14,22 @@ class DocxStateSerializer(serializers.ModelSerializer):
    class Meta:
        model = Docx
        fields = ["paragraphs_loaded", "paragraphs_processed"]
 class WordDocxSerializer(serializers.ModelSerializer):
    text = serializers.CharField()
    class Meta:
        model = WordDocx
        fields = ["text", "uuid"]
        extra_kwargs = {"uuid": {"read_only": True}, "text": {"write_only": True}}
        write_only = ["text"]
    def validate_text(self, val):
        return str(val).encode()
 class WordDocxStateSerializer(serializers.ModelSerializer):
    class Meta:
        model = WordDocx
        fields = ["paragraphs_loaded", "paragraphs_processed"]
--- a/checker/api/views.py
+++ b/checker/api/views.py
@ -4,8 +4,13 @@ from rest_framework.views import APIView
 from rest_framework.generics import get_object_or_404
 from rest_framework.parsers import MultiPartParser, FormParser
-from checker.api.serializers import DocxSerializer, DocxStateSerializer
+from checker.api.serializers import (
-from checker.models import Docx, ParagraphType
+    DocxSerializer,
    DocxStateSerializer,
    WordDocxSerializer,
    WordDocxStateSerializer,
 )
 from checker.models import Docx, ParagraphType, WordDocx
 class ListCreateDocxApiView(generics.ListCreateAPIView):
@ -29,3 +34,25 @@ class RetireDocxSerializer(APIView):
            res[p.name] = [(x.text, x.score) for x in p.paragraphs.filter(docx=doc)]
        return Response(res)
 class ListCreateWordDocxApiView(generics.ListCreateAPIView):
    parser_classes = [FormParser, MultiPartParser]
    serializer_class = WordDocxSerializer
    queryset = WordDocx.objects.all()
 class GetWordDocxState(generics.RetrieveAPIView):
    lookup_field = "uuid"
    queryset = WordDocx.objects.all()
    serializer_class = WordDocxStateSerializer
 class RetireWordDocxSerializer(APIView):
    # TODO create base class
    def get(self, request, uuid):
        doc = get_object_or_404(WordDocx, uuid=uuid)
        res = {}
        paragraphs = ParagraphType.objects.all()
        for p in paragraphs:
            res[p.name] = [(x.text, x.score) for x in p.word_paragraphs.filter(docx=doc)]
        return Response(res)
--- a/checker/models.py
+++ b/checker/models.py
@ -24,6 +24,23 @@ class Docx(models.Model):
        ordering = ["-created"]
 class WordDocx(models.Model):
    uuid = models.UUIDField(
        default=uuid.uuid4, editable=False, unique=True, primary_key=True
    )
    text = models.BinaryField()
    created = models.DateTimeField(auto_now_add=True)
    paragraphs_processed = models.IntegerField(default=0)
    paragraphs_loaded = models.IntegerField(default=0)
    def __str__(self):
        return str(self.uuid)
    class Meta:
        ordering = ["-created"]
 class ParagraphType(models.Model):
    name = models.CharField(max_length=200)
@ -32,9 +49,24 @@ class ParagraphType(models.Model):
 class Paragraph(models.Model):
-    score = models.IntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(100)])
+    score = models.IntegerField(
        default=0, validators=[MinValueValidator(0), MaxValueValidator(100)]
    )
    text = models.TextField()
    type = models.ForeignKey(
        ParagraphType, related_name="paragraphs", on_delete=models.CASCADE
    )
    docx = models.ForeignKey(Docx, related_name="paragraphs", on_delete=models.CASCADE)
 class WordParagraph(models.Model):
    text = models.TextField()
    type = models.ForeignKey(
        ParagraphType, related_name="word_paragraphs", on_delete=models.CASCADE
    )
    score = models.IntegerField(
        default=0, validators=[MinValueValidator(0), MaxValueValidator(100)]
    )
    docx = models.ForeignKey(
        WordDocx, related_name="paragraphs", on_delete=models.CASCADE
    )
--- a/checker/services/file.py
+++ b/checker/services/file.py
@ -3,8 +3,7 @@ import os
 from checker.services.generators import generate_charset
-def process_paragraphs(text):
+def _base_process(text):
    text = text.split("\n")
    paragraphs = {}
    c = 1
    title = True
@ -19,5 +18,16 @@ def process_paragraphs(text):
    return paragraphs
 def process_paragraphs(text):
    text = text.split("\n")
    return _base_process(text)
 def process_word_paragraphs(text):
    text = text.split("\\r")
    print(text)
    return _base_process(text)
 def media_upload_path(instance, filename):
    return os.path.join(f"uploads/{generate_charset(7)}/", filename)
--- a/checker/signals.py
+++ b/checker/signals.py
@ -1,8 +1,8 @@
 from django.db.models.signals import post_save
 from django.dispatch import receiver
-from checker.models import Docx
+from checker.models import Docx, WordDocx
-from checker.tasks import process_file
+from checker.tasks import process_file, process_word
@receiver(post_save, sender=Docx)
@ -10,3 +10,11 @@ def create_docs(sender, instance, created, **kwargs):
    if created:
        process_file.apply_async(kwargs={"pk": instance.pk})
        return
@receiver(post_save, sender=WordDocx)
 def create_docs(sender, instance, created, **kwargs):
    if created:
        process_word.apply_async(kwargs={"pk": instance.pk})
        return
--- a/checker/tasks.py
+++ b/checker/tasks.py
@ -2,8 +2,8 @@ import docx2txt
 import requests
 from celery import shared_task
-from checker.models import Paragraph, Docx
+from checker.models import Paragraph, Docx, WordDocx, WordParagraph
-from checker.services.file import process_paragraphs
+from checker.services.file import process_paragraphs, process_word_paragraphs
@shared_task()
@ -40,3 +40,40 @@ def process_file(pk: int):
            print(f"AI server error, {x.status_code}")
    return f"ok, {pk}"
@shared_task()
 def process_word(pk: int):
    file = WordDocx.objects.get(pk=pk)
    uuid = file.uuid
    paragraphs = process_word_paragraphs(file.text.tobytes().decode())
    print(paragraphs)
    file.paragraphs_loaded = len(paragraphs)
    file.save(update_fields=["paragraphs_loaded"])
    cut = 100
    counter = 0
    len_c = len(paragraphs)
    paragraphs = list(paragraphs.values())
    for i in range(0, len(paragraphs) // cut + 1):
        vals = paragraphs[i * cut : (i + 1) * cut + 1]
        dct = {x: vals[x] for x in range(len(vals))}
        x = requests.post("http://109.248.175.223:5000/api", json=dct)
        if x.status_code == 200:
            for el_id, dat in x.json().items():
                type_id, score = dat
                WordParagraph.objects.create(
                    type_id=type_id, docx=file, text=dct[int(el_id)], score=score
                )
            counter += len(vals)
            print(f"processing {uuid}, {counter}/{len_c}")
            file.paragraphs_processed = counter
            file.save(update_fields=["paragraphs_processed"])
        else:
            print(f"AI server error, {x.status_code}")
    return f"ok, {pk}"
--- a/conf/api_router.py
+++ b/conf/api_router.py
@ -1,6 +1,7 @@
 from django.urls import path, include
-from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState
+from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState, ListCreateWordDocxApiView, \
    GetWordDocxState, RetireWordDocxSerializer
 urlpatterns = [
    path("health/", include("health_check.urls")),
@ -22,12 +23,12 @@ urlpatterns = [
        "word/",
        include(
            [
-                path("docx/", ListCreateDocxApiView.as_view(), name="list_create_word"),
+                path("docx/", ListCreateWordDocxApiView.as_view(), name="list_create_word"),
                path(
-                    "docx/<uuid:uuid>", RetireDocxSerializer.as_view(), name="get_word"
+                    "docx/<uuid:uuid>", GetWordDocxState.as_view(), name="get_word"
                ),
                path(
-                    "state/<uuid:uuid>", GetDocxState.as_view(), name="get_state_word"
+                    "state/<uuid:uuid>", RetireWordDocxSerializer.as_view(), name="get_state_word"
                ),
            ]
        ),