updated score calc

This commit is contained in:
Alexander Karpov 2023-09-09 10:07:17 +03:00
parent 1da26e26a9
commit f08a5b93eb
3 changed files with 48 additions and 15 deletions

View File

@ -313,9 +313,9 @@
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer
CELERY_RESULT_SERIALIZER = "json" CELERY_RESULT_SERIALIZER = "json"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit # https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit
CELERY_TASK_TIME_LIMIT = 5 * 60 CELERY_TASK_TIME_LIMIT = 10 * 60
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit # https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit
CELERY_TASK_SOFT_TIME_LIMIT = 60 CELERY_TASK_SOFT_TIME_LIMIT = 5 * 60
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler" CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events # https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events

View File

@ -2,8 +2,8 @@
from django.db.models.signals import post_save from django.db.models.signals import post_save
from django.dispatch import receiver from django.dispatch import receiver
from press_release_nl.processor.models import Text from press_release_nl.processor.models import Entry, Text
from press_release_nl.processor.tasks import load_text, load_text_sum from press_release_nl.processor.tasks import load_text, load_text_sum, run_ml
@receiver(post_save, sender=Text) @receiver(post_save, sender=Text)
@ -21,5 +21,11 @@ def run_text_process(sender, instance: Text, created, **kwargs):
Text.objects.create(entry=instance.entry, text=text) Text.objects.create(entry=instance.entry, text=text)
instance.delete() instance.delete()
return return
load_text.apply_async(kwargs={"pk": instance.pk}, countdown=2) load_text.apply_async(kwargs={"pk": instance.pk}, countdown=1)
load_text_sum.apply_async(kwargs={"pk": instance.pk}, countdown=4) load_text_sum.apply_async(kwargs={"pk": instance.pk}, countdown=4)
@receiver(post_save, sender=Entry)
def run_entry_ml(sender, instance: Entry, created, **kwargs):
if created:
run_ml.apply_async(kwargs={"pk": instance.pk}, countdown=4)

View File

@ -4,9 +4,9 @@
import textract import textract
from celery import shared_task from celery import shared_task
from press_release_nl.processor.models import Text from press_release_nl.processor.models import Entry, Text
ML_HOST = "https://2b6a-176-59-106-6.ngrok-free.app/" ML_HOST = "http://192.168.107.95:8000/"
ML_SUM_HOST = "https://dev.akarpov.ru/" ML_SUM_HOST = "https://dev.akarpov.ru/"
@ -18,24 +18,51 @@ def load_text(pk: int):
text.file.path, encoding="unicode_escape", language="rus" text.file.path, encoding="unicode_escape", language="rus"
).decode() ).decode()
text.save() text.save()
re = requests.post(ML_HOST + "predict", json={"data": text.text}) if not text.text:
if re.status_code != 200: text.delete()
raise ValueError(re.text) return
text.score = re.json()
text.save()
@shared_task
def run_ml(pk: int, f=True):
try:
entry = Entry.objects.get(pk=pk)
except Entry.DoesNotExist:
return
if entry.texts.filter(text__isnull=True).exists():
sleep(10)
for text in entry.texts.all():
re_bert = requests.post(ML_HOST + "bert/process", json={"data": text.text})
re_tf = requests.post(ML_HOST + "tfidf/process", json={"data": text.text})
if re_bert.status_code != 200:
print(re_bert.status_code, "bert")
continue
if re_tf.status_code != 200:
print(re_tf.status_code, "tf-idf")
continue
text.refresh_from_db()
text.score = {
"bert": re_bert.json(),
"f": re_tf.json(),
}
text.save(update_fields=["score"])
return pk return pk
@shared_task @shared_task
def load_text_sum(pk: int): def load_text_sum(pk: int):
text = Text.objects.get(pk=pk) try:
text = Text.objects.get(pk=pk)
except Text.DoesNotExist:
return
if not text.text: if not text.text:
sleep(3) sleep(3)
text.refresh_from_db() text.refresh_from_db()
re = requests.post(ML_SUM_HOST, json={"body": text.text}) re = requests.post(ML_SUM_HOST, json={"body": text.text})
if re.status_code != 200: if re.status_code != 200:
raise ValueError(re.text) raise ValueError(re.status_code)
data = re.json() data = re.json()
text.refresh_from_db()
text.summery = str(data) text.summery = str(data)
text.save() text.save(update_fields=["summery"])
return pk return pk