mirror of
https://github.com/Ai-hack-MAGNUM-OPUS/backend.git
synced 2024-11-24 01:03:44 +03:00
implemented word endpoints
This commit is contained in:
parent
cdf082415c
commit
1d859084d1
|
@ -1,6 +1,6 @@
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
from checker.models import Docx
|
from checker.models import Docx, WordDocx
|
||||||
|
|
||||||
|
|
||||||
class DocxSerializer(serializers.ModelSerializer):
|
class DocxSerializer(serializers.ModelSerializer):
|
||||||
|
@ -14,3 +14,22 @@ class DocxStateSerializer(serializers.ModelSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Docx
|
model = Docx
|
||||||
fields = ["paragraphs_loaded", "paragraphs_processed"]
|
fields = ["paragraphs_loaded", "paragraphs_processed"]
|
||||||
|
|
||||||
|
|
||||||
|
class WordDocxSerializer(serializers.ModelSerializer):
|
||||||
|
text = serializers.CharField()
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = WordDocx
|
||||||
|
fields = ["text", "uuid"]
|
||||||
|
extra_kwargs = {"uuid": {"read_only": True}, "text": {"write_only": True}}
|
||||||
|
write_only = ["text"]
|
||||||
|
|
||||||
|
def validate_text(self, val):
|
||||||
|
return str(val).encode()
|
||||||
|
|
||||||
|
|
||||||
|
class WordDocxStateSerializer(serializers.ModelSerializer):
|
||||||
|
class Meta:
|
||||||
|
model = WordDocx
|
||||||
|
fields = ["paragraphs_loaded", "paragraphs_processed"]
|
||||||
|
|
|
@ -4,8 +4,13 @@ from rest_framework.views import APIView
|
||||||
from rest_framework.generics import get_object_or_404
|
from rest_framework.generics import get_object_or_404
|
||||||
from rest_framework.parsers import MultiPartParser, FormParser
|
from rest_framework.parsers import MultiPartParser, FormParser
|
||||||
|
|
||||||
from checker.api.serializers import DocxSerializer, DocxStateSerializer
|
from checker.api.serializers import (
|
||||||
from checker.models import Docx, ParagraphType
|
DocxSerializer,
|
||||||
|
DocxStateSerializer,
|
||||||
|
WordDocxSerializer,
|
||||||
|
WordDocxStateSerializer,
|
||||||
|
)
|
||||||
|
from checker.models import Docx, ParagraphType, WordDocx
|
||||||
|
|
||||||
|
|
||||||
class ListCreateDocxApiView(generics.ListCreateAPIView):
|
class ListCreateDocxApiView(generics.ListCreateAPIView):
|
||||||
|
@ -29,3 +34,25 @@ class RetireDocxSerializer(APIView):
|
||||||
res[p.name] = [(x.text, x.score) for x in p.paragraphs.filter(docx=doc)]
|
res[p.name] = [(x.text, x.score) for x in p.paragraphs.filter(docx=doc)]
|
||||||
return Response(res)
|
return Response(res)
|
||||||
|
|
||||||
|
|
||||||
|
class ListCreateWordDocxApiView(generics.ListCreateAPIView):
|
||||||
|
parser_classes = [FormParser, MultiPartParser]
|
||||||
|
serializer_class = WordDocxSerializer
|
||||||
|
queryset = WordDocx.objects.all()
|
||||||
|
|
||||||
|
|
||||||
|
class GetWordDocxState(generics.RetrieveAPIView):
|
||||||
|
lookup_field = "uuid"
|
||||||
|
queryset = WordDocx.objects.all()
|
||||||
|
serializer_class = WordDocxStateSerializer
|
||||||
|
|
||||||
|
|
||||||
|
class RetireWordDocxSerializer(APIView):
|
||||||
|
# TODO create base class
|
||||||
|
def get(self, request, uuid):
|
||||||
|
doc = get_object_or_404(WordDocx, uuid=uuid)
|
||||||
|
res = {}
|
||||||
|
paragraphs = ParagraphType.objects.all()
|
||||||
|
for p in paragraphs:
|
||||||
|
res[p.name] = [(x.text, x.score) for x in p.word_paragraphs.filter(docx=doc)]
|
||||||
|
return Response(res)
|
||||||
|
|
|
@ -24,6 +24,23 @@ class Docx(models.Model):
|
||||||
ordering = ["-created"]
|
ordering = ["-created"]
|
||||||
|
|
||||||
|
|
||||||
|
class WordDocx(models.Model):
|
||||||
|
uuid = models.UUIDField(
|
||||||
|
default=uuid.uuid4, editable=False, unique=True, primary_key=True
|
||||||
|
)
|
||||||
|
text = models.BinaryField()
|
||||||
|
created = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
paragraphs_processed = models.IntegerField(default=0)
|
||||||
|
paragraphs_loaded = models.IntegerField(default=0)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.uuid)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
ordering = ["-created"]
|
||||||
|
|
||||||
|
|
||||||
class ParagraphType(models.Model):
|
class ParagraphType(models.Model):
|
||||||
name = models.CharField(max_length=200)
|
name = models.CharField(max_length=200)
|
||||||
|
|
||||||
|
@ -32,9 +49,24 @@ class ParagraphType(models.Model):
|
||||||
|
|
||||||
|
|
||||||
class Paragraph(models.Model):
|
class Paragraph(models.Model):
|
||||||
score = models.IntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(100)])
|
score = models.IntegerField(
|
||||||
|
default=0, validators=[MinValueValidator(0), MaxValueValidator(100)]
|
||||||
|
)
|
||||||
text = models.TextField()
|
text = models.TextField()
|
||||||
type = models.ForeignKey(
|
type = models.ForeignKey(
|
||||||
ParagraphType, related_name="paragraphs", on_delete=models.CASCADE
|
ParagraphType, related_name="paragraphs", on_delete=models.CASCADE
|
||||||
)
|
)
|
||||||
docx = models.ForeignKey(Docx, related_name="paragraphs", on_delete=models.CASCADE)
|
docx = models.ForeignKey(Docx, related_name="paragraphs", on_delete=models.CASCADE)
|
||||||
|
|
||||||
|
|
||||||
|
class WordParagraph(models.Model):
|
||||||
|
text = models.TextField()
|
||||||
|
type = models.ForeignKey(
|
||||||
|
ParagraphType, related_name="word_paragraphs", on_delete=models.CASCADE
|
||||||
|
)
|
||||||
|
score = models.IntegerField(
|
||||||
|
default=0, validators=[MinValueValidator(0), MaxValueValidator(100)]
|
||||||
|
)
|
||||||
|
docx = models.ForeignKey(
|
||||||
|
WordDocx, related_name="paragraphs", on_delete=models.CASCADE
|
||||||
|
)
|
||||||
|
|
|
@ -3,8 +3,7 @@ import os
|
||||||
from checker.services.generators import generate_charset
|
from checker.services.generators import generate_charset
|
||||||
|
|
||||||
|
|
||||||
def process_paragraphs(text):
|
def _base_process(text):
|
||||||
text = text.split("\n")
|
|
||||||
paragraphs = {}
|
paragraphs = {}
|
||||||
c = 1
|
c = 1
|
||||||
title = True
|
title = True
|
||||||
|
@ -19,5 +18,16 @@ def process_paragraphs(text):
|
||||||
return paragraphs
|
return paragraphs
|
||||||
|
|
||||||
|
|
||||||
|
def process_paragraphs(text):
|
||||||
|
text = text.split("\n")
|
||||||
|
return _base_process(text)
|
||||||
|
|
||||||
|
|
||||||
|
def process_word_paragraphs(text):
|
||||||
|
text = text.split("\\r")
|
||||||
|
print(text)
|
||||||
|
return _base_process(text)
|
||||||
|
|
||||||
|
|
||||||
def media_upload_path(instance, filename):
|
def media_upload_path(instance, filename):
|
||||||
return os.path.join(f"uploads/{generate_charset(7)}/", filename)
|
return os.path.join(f"uploads/{generate_charset(7)}/", filename)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
|
||||||
from checker.models import Docx
|
from checker.models import Docx, WordDocx
|
||||||
from checker.tasks import process_file
|
from checker.tasks import process_file, process_word
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, sender=Docx)
|
@receiver(post_save, sender=Docx)
|
||||||
|
@ -10,3 +10,11 @@ def create_docs(sender, instance, created, **kwargs):
|
||||||
if created:
|
if created:
|
||||||
process_file.apply_async(kwargs={"pk": instance.pk})
|
process_file.apply_async(kwargs={"pk": instance.pk})
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@receiver(post_save, sender=WordDocx)
|
||||||
|
def create_docs(sender, instance, created, **kwargs):
|
||||||
|
if created:
|
||||||
|
process_word.apply_async(kwargs={"pk": instance.pk})
|
||||||
|
return
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,8 @@ import docx2txt
|
||||||
import requests
|
import requests
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
|
|
||||||
from checker.models import Paragraph, Docx
|
from checker.models import Paragraph, Docx, WordDocx, WordParagraph
|
||||||
from checker.services.file import process_paragraphs
|
from checker.services.file import process_paragraphs, process_word_paragraphs
|
||||||
|
|
||||||
|
|
||||||
@shared_task()
|
@shared_task()
|
||||||
|
@ -40,3 +40,40 @@ def process_file(pk: int):
|
||||||
print(f"AI server error, {x.status_code}")
|
print(f"AI server error, {x.status_code}")
|
||||||
|
|
||||||
return f"ok, {pk}"
|
return f"ok, {pk}"
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task()
|
||||||
|
def process_word(pk: int):
|
||||||
|
file = WordDocx.objects.get(pk=pk)
|
||||||
|
uuid = file.uuid
|
||||||
|
paragraphs = process_word_paragraphs(file.text.tobytes().decode())
|
||||||
|
print(paragraphs)
|
||||||
|
|
||||||
|
file.paragraphs_loaded = len(paragraphs)
|
||||||
|
file.save(update_fields=["paragraphs_loaded"])
|
||||||
|
|
||||||
|
cut = 100
|
||||||
|
counter = 0
|
||||||
|
len_c = len(paragraphs)
|
||||||
|
paragraphs = list(paragraphs.values())
|
||||||
|
for i in range(0, len(paragraphs) // cut + 1):
|
||||||
|
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
||||||
|
dct = {x: vals[x] for x in range(len(vals))}
|
||||||
|
|
||||||
|
x = requests.post("http://109.248.175.223:5000/api", json=dct)
|
||||||
|
if x.status_code == 200:
|
||||||
|
for el_id, dat in x.json().items():
|
||||||
|
type_id, score = dat
|
||||||
|
WordParagraph.objects.create(
|
||||||
|
type_id=type_id, docx=file, text=dct[int(el_id)], score=score
|
||||||
|
)
|
||||||
|
|
||||||
|
counter += len(vals)
|
||||||
|
print(f"processing {uuid}, {counter}/{len_c}")
|
||||||
|
file.paragraphs_processed = counter
|
||||||
|
file.save(update_fields=["paragraphs_processed"])
|
||||||
|
else:
|
||||||
|
print(f"AI server error, {x.status_code}")
|
||||||
|
|
||||||
|
return f"ok, {pk}"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from django.urls import path, include
|
from django.urls import path, include
|
||||||
|
|
||||||
from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState
|
from checker.api.views import ListCreateDocxApiView, RetireDocxSerializer, GetDocxState, ListCreateWordDocxApiView, \
|
||||||
|
GetWordDocxState, RetireWordDocxSerializer
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path("health/", include("health_check.urls")),
|
path("health/", include("health_check.urls")),
|
||||||
|
@ -22,12 +23,12 @@ urlpatterns = [
|
||||||
"word/",
|
"word/",
|
||||||
include(
|
include(
|
||||||
[
|
[
|
||||||
path("docx/", ListCreateDocxApiView.as_view(), name="list_create_word"),
|
path("docx/", ListCreateWordDocxApiView.as_view(), name="list_create_word"),
|
||||||
path(
|
path(
|
||||||
"docx/<uuid:uuid>", RetireDocxSerializer.as_view(), name="get_word"
|
"docx/<uuid:uuid>", GetWordDocxState.as_view(), name="get_word"
|
||||||
),
|
),
|
||||||
path(
|
path(
|
||||||
"state/<uuid:uuid>", GetDocxState.as_view(), name="get_state_word"
|
"state/<uuid:uuid>", RetireWordDocxSerializer.as_view(), name="get_state_word"
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user