backend/checker/tasks.py

import docx2txt
import requests
from celery import shared_task

from checker.models import Paragraph, Docx, WordDocx, WordParagraph
from checker.services.file import process_paragraphs, process_word_paragraphs


@shared_task()
def process_file(pk: int):
    file = Docx.objects.get(pk=pk)
    uuid = file.uuid
    document = docx2txt.process(file.file.path)
    paragraphs = process_paragraphs(document)

    file.paragraphs_loaded = len(paragraphs)
    file.save(update_fields=["paragraphs_loaded"])

    cut = 100
    counter = 0
    len_c = len(paragraphs)
    paragraphs = list(paragraphs.values())
    for i in range(0, len(paragraphs) // cut + 1):
        vals = paragraphs[i * cut : (i + 1) * cut + 1]
        dct = {x: vals[x] for x in range(len(vals))}

        x = requests.post("http://109.248.175.223:5000/api", json=dct)
        if x.status_code == 200:
            for el_id, dat in x.json().items():
                type_id, score = dat
                Paragraph.objects.create(
                    type_id=type_id, docx=file, text=dct[int(el_id)], score=score
                )

            counter += len(vals)
            print(f"processing {uuid}, {counter}/{len_c}")
            file.paragraphs_processed = counter
            file.save(update_fields=["paragraphs_processed"])
        else:
            print(f"AI server error, {x.status_code}")

    return f"ok, {pk}"


@shared_task()
def process_word(pk: int):
    file = WordDocx.objects.get(pk=pk)
    uuid = file.uuid
    paragraphs = process_word_paragraphs(file.text.tobytes().decode())
    print(paragraphs)

    file.paragraphs_loaded = len(paragraphs)
    file.save(update_fields=["paragraphs_loaded"])

    cut = 100
    counter = 0
    len_c = len(paragraphs)
    paragraphs = list(paragraphs.values())
    for i in range(0, len(paragraphs) // cut + 1):
        vals = paragraphs[i * cut : (i + 1) * cut + 1]
        dct = {x: vals[x] for x in range(len(vals))}

        x = requests.post("http://109.248.175.223:5000/api", json=dct)
        if x.status_code == 200:
            for el_id, dat in x.json().items():
                type_id, score = dat
                WordParagraph.objects.create(
                    type_id=type_id, docx=file, text=dct[int(el_id)], score=score
                )

            counter += len(vals)
            print(f"processing {uuid}, {counter}/{len_c}")
            file.paragraphs_processed = counter
            file.save(update_fields=["paragraphs_processed"])
        else:
            print(f"AI server error, {x.status_code}")

    return f"ok, {pk}"
added async task worker, state save 2022-08-27 07:38:54 +03:00			`import docx2txt`
			`import requests`
added docx endpoints, inited celery 2022-08-26 20:04:45 +03:00			`from celery import shared_task`

implemented word endpoints 2022-08-27 11:59:23 +03:00			`from checker.models import Paragraph, Docx, WordDocx, WordParagraph`
			`from checker.services.file import process_paragraphs, process_word_paragraphs`
added async task worker, state save 2022-08-27 07:38:54 +03:00

			`@shared_task()`
			`def process_file(pk: int):`
			`file = Docx.objects.get(pk=pk)`
fixed parser, added file processing state 2022-08-27 10:16:21 +03:00			`uuid = file.uuid`
added async task worker, state save 2022-08-27 07:38:54 +03:00			`document = docx2txt.process(file.file.path)`
fixed parser, added file processing state 2022-08-27 10:16:21 +03:00			`paragraphs = process_paragraphs(document)`
added docx endpoints, inited celery 2022-08-26 20:04:45 +03:00
added async task worker, state save 2022-08-27 07:38:54 +03:00			`file.paragraphs_loaded = len(paragraphs)`
			`file.save(update_fields=["paragraphs_loaded"])`

fixed parser, added file processing state 2022-08-27 10:16:21 +03:00			`cut = 100`
			`counter = 0`
			`len_c = len(paragraphs)`
			`paragraphs = list(paragraphs.values())`
			`for i in range(0, len(paragraphs) // cut + 1):`
			`vals = paragraphs[i * cut : (i + 1) * cut + 1]`
			`dct = {x: vals[x] for x in range(len(vals))}`
added async task worker, state save 2022-08-27 07:38:54 +03:00
fixed parser, added file processing state 2022-08-27 10:16:21 +03:00			`x = requests.post("http://109.248.175.223:5000/api", json=dct)`
added paragraph score, inited word api 2022-08-27 11:13:36 +03:00			`if x.status_code == 200:`
			`for el_id, dat in x.json().items():`
			`type_id, score = dat`
			`Paragraph.objects.create(`
			`type_id=type_id, docx=file, text=dct[int(el_id)], score=score`
			`)`

			`counter += len(vals)`
			`print(f"processing {uuid}, {counter}/{len_c}")`
			`file.paragraphs_processed = counter`
			`file.save(update_fields=["paragraphs_processed"])`
			`else:`
			`print(f"AI server error, {x.status_code}")`
added async task worker, state save 2022-08-27 07:38:54 +03:00
fixed parser, added file processing state 2022-08-27 10:16:21 +03:00			`return f"ok, {pk}"`
implemented word endpoints 2022-08-27 11:59:23 +03:00

			`@shared_task()`
			`def process_word(pk: int):`
			`file = WordDocx.objects.get(pk=pk)`
			`uuid = file.uuid`
			`paragraphs = process_word_paragraphs(file.text.tobytes().decode())`
			`print(paragraphs)`

			`file.paragraphs_loaded = len(paragraphs)`
			`file.save(update_fields=["paragraphs_loaded"])`

			`cut = 100`
			`counter = 0`
			`len_c = len(paragraphs)`
			`paragraphs = list(paragraphs.values())`
			`for i in range(0, len(paragraphs) // cut + 1):`
			`vals = paragraphs[i * cut : (i + 1) * cut + 1]`
			`dct = {x: vals[x] for x in range(len(vals))}`

			`x = requests.post("http://109.248.175.223:5000/api", json=dct)`
			`if x.status_code == 200:`
			`for el_id, dat in x.json().items():`
			`type_id, score = dat`
			`WordParagraph.objects.create(`
			`type_id=type_id, docx=file, text=dct[int(el_id)], score=score`
			`)`

			`counter += len(vals)`
			`print(f"processing {uuid}, {counter}/{len_c}")`
			`file.paragraphs_processed = counter`
			`file.save(update_fields=["paragraphs_processed"])`
			`else:`
			`print(f"AI server error, {x.status_code}")`

			`return f"ok, {pk}"`