2022-08-27 07:38:54 +03:00
|
|
|
from time import sleep
|
|
|
|
|
|
|
|
import docx2txt
|
|
|
|
import requests
|
2022-08-26 20:04:45 +03:00
|
|
|
from celery import shared_task
|
|
|
|
|
2022-08-27 07:38:54 +03:00
|
|
|
from django.conf import settings
|
|
|
|
|
|
|
|
from checker.models import Paragraph, Docx
|
|
|
|
from checker.services.file import process_paragraphs
|
|
|
|
|
|
|
|
|
|
|
|
@shared_task()
|
|
|
|
def process_file(pk: int):
|
|
|
|
file = Docx.objects.get(pk=pk)
|
|
|
|
document = docx2txt.process(file.file.path)
|
|
|
|
paragraphs = process_paragraphs(document.split("\n"))
|
2022-08-26 20:04:45 +03:00
|
|
|
|
2022-08-27 07:38:54 +03:00
|
|
|
file.paragraphs_loaded = len(paragraphs)
|
|
|
|
file.save(update_fields=["paragraphs_loaded"])
|
|
|
|
|
|
|
|
x = requests.post("http://185.244.175.164:5000/api", json=paragraphs)
|
|
|
|
for el_id, type_id in x.json().items():
|
|
|
|
Paragraph.objects.create(
|
|
|
|
type_id=type_id, docx=file, text=paragraphs[el_id]
|
|
|
|
)
|
|
|
|
|
|
|
|
file.paragraphs_processed = len(paragraphs)
|
|
|
|
file.save(update_fields=["paragraphs_processed"])
|
2022-08-26 20:04:45 +03:00
|
|
|
|
2022-08-26 23:18:02 +03:00
|
|
|
return file
|
2022-08-27 07:38:54 +03:00
|
|
|
|