mirror of
https://github.com/magnum-opus-nn-cp/backend.git
synced 2024-12-04 20:53:44 +03:00
updated score calc
This commit is contained in:
parent
1da26e26a9
commit
f08a5b93eb
|
@ -313,9 +313,9 @@
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std:setting-result_serializer
|
||||||
CELERY_RESULT_SERIALIZER = "json"
|
CELERY_RESULT_SERIALIZER = "json"
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-time-limit
|
||||||
CELERY_TASK_TIME_LIMIT = 5 * 60
|
CELERY_TASK_TIME_LIMIT = 10 * 60
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-soft-time-limit
|
||||||
CELERY_TASK_SOFT_TIME_LIMIT = 60
|
CELERY_TASK_SOFT_TIME_LIMIT = 5 * 60
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-scheduler
|
||||||
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
|
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#worker-send-task-events
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
|
||||||
from press_release_nl.processor.models import Text
|
from press_release_nl.processor.models import Entry, Text
|
||||||
from press_release_nl.processor.tasks import load_text, load_text_sum
|
from press_release_nl.processor.tasks import load_text, load_text_sum, run_ml
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, sender=Text)
|
@receiver(post_save, sender=Text)
|
||||||
|
@ -21,5 +21,11 @@ def run_text_process(sender, instance: Text, created, **kwargs):
|
||||||
Text.objects.create(entry=instance.entry, text=text)
|
Text.objects.create(entry=instance.entry, text=text)
|
||||||
instance.delete()
|
instance.delete()
|
||||||
return
|
return
|
||||||
load_text.apply_async(kwargs={"pk": instance.pk}, countdown=2)
|
load_text.apply_async(kwargs={"pk": instance.pk}, countdown=1)
|
||||||
load_text_sum.apply_async(kwargs={"pk": instance.pk}, countdown=4)
|
load_text_sum.apply_async(kwargs={"pk": instance.pk}, countdown=4)
|
||||||
|
|
||||||
|
|
||||||
|
@receiver(post_save, sender=Entry)
|
||||||
|
def run_entry_ml(sender, instance: Entry, created, **kwargs):
|
||||||
|
if created:
|
||||||
|
run_ml.apply_async(kwargs={"pk": instance.pk}, countdown=4)
|
||||||
|
|
|
@ -4,9 +4,9 @@
|
||||||
import textract
|
import textract
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
|
|
||||||
from press_release_nl.processor.models import Text
|
from press_release_nl.processor.models import Entry, Text
|
||||||
|
|
||||||
ML_HOST = "https://2b6a-176-59-106-6.ngrok-free.app/"
|
ML_HOST = "http://192.168.107.95:8000/"
|
||||||
ML_SUM_HOST = "https://dev.akarpov.ru/"
|
ML_SUM_HOST = "https://dev.akarpov.ru/"
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,24 +18,51 @@ def load_text(pk: int):
|
||||||
text.file.path, encoding="unicode_escape", language="rus"
|
text.file.path, encoding="unicode_escape", language="rus"
|
||||||
).decode()
|
).decode()
|
||||||
text.save()
|
text.save()
|
||||||
re = requests.post(ML_HOST + "predict", json={"data": text.text})
|
if not text.text:
|
||||||
if re.status_code != 200:
|
text.delete()
|
||||||
raise ValueError(re.text)
|
return
|
||||||
text.score = re.json()
|
|
||||||
text.save()
|
|
||||||
|
@shared_task
|
||||||
|
def run_ml(pk: int, f=True):
|
||||||
|
try:
|
||||||
|
entry = Entry.objects.get(pk=pk)
|
||||||
|
except Entry.DoesNotExist:
|
||||||
|
return
|
||||||
|
if entry.texts.filter(text__isnull=True).exists():
|
||||||
|
sleep(10)
|
||||||
|
for text in entry.texts.all():
|
||||||
|
re_bert = requests.post(ML_HOST + "bert/process", json={"data": text.text})
|
||||||
|
re_tf = requests.post(ML_HOST + "tfidf/process", json={"data": text.text})
|
||||||
|
if re_bert.status_code != 200:
|
||||||
|
print(re_bert.status_code, "bert")
|
||||||
|
continue
|
||||||
|
if re_tf.status_code != 200:
|
||||||
|
print(re_tf.status_code, "tf-idf")
|
||||||
|
continue
|
||||||
|
text.refresh_from_db()
|
||||||
|
text.score = {
|
||||||
|
"bert": re_bert.json(),
|
||||||
|
"f": re_tf.json(),
|
||||||
|
}
|
||||||
|
text.save(update_fields=["score"])
|
||||||
return pk
|
return pk
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def load_text_sum(pk: int):
|
def load_text_sum(pk: int):
|
||||||
text = Text.objects.get(pk=pk)
|
try:
|
||||||
|
text = Text.objects.get(pk=pk)
|
||||||
|
except Text.DoesNotExist:
|
||||||
|
return
|
||||||
if not text.text:
|
if not text.text:
|
||||||
sleep(3)
|
sleep(3)
|
||||||
text.refresh_from_db()
|
text.refresh_from_db()
|
||||||
re = requests.post(ML_SUM_HOST, json={"body": text.text})
|
re = requests.post(ML_SUM_HOST, json={"body": text.text})
|
||||||
if re.status_code != 200:
|
if re.status_code != 200:
|
||||||
raise ValueError(re.text)
|
raise ValueError(re.status_code)
|
||||||
data = re.json()
|
data = re.json()
|
||||||
|
text.refresh_from_db()
|
||||||
text.summery = str(data)
|
text.summery = str(data)
|
||||||
text.save()
|
text.save(update_fields=["summery"])
|
||||||
return pk
|
return pk
|
||||||
|
|
Loading…
Reference in New Issue
Block a user