diff --git a/config/settings/base.py b/config/settings/base.py index 85ca163..a538492 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -313,9 +313,9 @@ # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer CELERY_RESULT_SERIALIZER = "json" # https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit -CELERY_TASK_TIME_LIMIT = 5 * 60 +CELERY_TASK_TIME_LIMIT = 10 * 60 # https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit -CELERY_TASK_SOFT_TIME_LIMIT = 60 +CELERY_TASK_SOFT_TIME_LIMIT = 5 * 60 # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler" # https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events diff --git a/press_release_nl/processor/signals.py b/press_release_nl/processor/signals.py index 9372761..0420f80 100644 --- a/press_release_nl/processor/signals.py +++ b/press_release_nl/processor/signals.py @@ -2,8 +2,8 @@ from django.db.models.signals import post_save from django.dispatch import receiver -from press_release_nl.processor.models import Text -from press_release_nl.processor.tasks import load_text, load_text_sum +from press_release_nl.processor.models import Entry, Text +from press_release_nl.processor.tasks import load_text, load_text_sum, run_ml @receiver(post_save, sender=Text) @@ -21,5 +21,11 @@ def run_text_process(sender, instance: Text, created, **kwargs): Text.objects.create(entry=instance.entry, text=text) instance.delete() return - load_text.apply_async(kwargs={"pk": instance.pk}, countdown=2) + load_text.apply_async(kwargs={"pk": instance.pk}, countdown=1) load_text_sum.apply_async(kwargs={"pk": instance.pk}, countdown=4) + + +@receiver(post_save, sender=Entry) +def run_entry_ml(sender, instance: Entry, created, **kwargs): + if created: + run_ml.apply_async(kwargs={"pk": instance.pk}, countdown=4) diff --git a/press_release_nl/processor/tasks.py b/press_release_nl/processor/tasks.py index f4eb9b7..e93bd64 100644 --- a/press_release_nl/processor/tasks.py +++ b/press_release_nl/processor/tasks.py @@ -4,9 +4,9 @@ import textract from celery import shared_task -from press_release_nl.processor.models import Text +from press_release_nl.processor.models import Entry, Text -ML_HOST = "https://2b6a-176-59-106-6.ngrok-free.app/" +ML_HOST = "http://192.168.107.95:8000/" ML_SUM_HOST = "https://dev.akarpov.ru/" @@ -18,24 +18,51 @@ def load_text(pk: int): text.file.path, encoding="unicode_escape", language="rus" ).decode() text.save() - re = requests.post(ML_HOST + "predict", json={"data": text.text}) - if re.status_code != 200: - raise ValueError(re.text) - text.score = re.json() - text.save() + if not text.text: + text.delete() + return + + +@shared_task +def run_ml(pk: int, f=True): + try: + entry = Entry.objects.get(pk=pk) + except Entry.DoesNotExist: + return + if entry.texts.filter(text__isnull=True).exists(): + sleep(10) + for text in entry.texts.all(): + re_bert = requests.post(ML_HOST + "bert/process", json={"data": text.text}) + re_tf = requests.post(ML_HOST + "tfidf/process", json={"data": text.text}) + if re_bert.status_code != 200: + print(re_bert.status_code, "bert") + continue + if re_tf.status_code != 200: + print(re_tf.status_code, "tf-idf") + continue + text.refresh_from_db() + text.score = { + "bert": re_bert.json(), + "f": re_tf.json(), + } + text.save(update_fields=["score"]) return pk @shared_task def load_text_sum(pk: int): - text = Text.objects.get(pk=pk) + try: + text = Text.objects.get(pk=pk) + except Text.DoesNotExist: + return if not text.text: sleep(3) text.refresh_from_db() re = requests.post(ML_SUM_HOST, json={"body": text.text}) if re.status_code != 200: - raise ValueError(re.text) + raise ValueError(re.status_code) data = re.json() + text.refresh_from_db() text.summery = str(data) - text.save() + text.save(update_fields=["summery"]) return pk