mirror of
https://github.com/magnum-opus-nn-cp/backend.git
synced 2024-12-05 05:03:44 +03:00
added xlsx parse, ip lookup
This commit is contained in:
parent
59234a5f2c
commit
58d2efa707
|
@ -11,9 +11,15 @@
|
||||||
default="LnQa85vE4W235BIYizBVnsOPJOfrBxjpdrgWtmDNaUMEIbDCxwySRuyp4hpmJMZ2",
|
default="LnQa85vE4W235BIYizBVnsOPJOfrBxjpdrgWtmDNaUMEIbDCxwySRuyp4hpmJMZ2",
|
||||||
)
|
)
|
||||||
# https://docs.djangoproject.com/en/dev/ref/settings/#allowed-hosts
|
# https://docs.djangoproject.com/en/dev/ref/settings/#allowed-hosts
|
||||||
ALLOWED_HOSTS = ["localhost", "0.0.0.0", "127.0.0.1", "192.168.107.4"]
|
ALLOWED_HOSTS = [
|
||||||
|
"localhost",
|
||||||
|
"0.0.0.0",
|
||||||
|
"127.0.0.1",
|
||||||
|
"192.168.103.224",
|
||||||
|
"192.168.107.4",
|
||||||
|
]
|
||||||
CORS_ORIGIN_ALLOW_ALL = True
|
CORS_ORIGIN_ALLOW_ALL = True
|
||||||
CSRF_TRUSTED_ORIGINS = ["http://192.168.107.4"]
|
CSRF_TRUSTED_ORIGINS = ["http://192.168.103.224", "http://192.168.107.4"]
|
||||||
|
|
||||||
# WhiteNoise
|
# WhiteNoise
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
45
poetry.lock
generated
45
poetry.lock
generated
|
@ -1070,6 +1070,18 @@ files = [
|
||||||
{file = "ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1"},
|
{file = "ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "et-xmlfile"
|
||||||
|
version = "1.1.0"
|
||||||
|
description = "An implementation of lxml.xmlfile for the standard library"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
|
||||||
|
{file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "executing"
|
name = "executing"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
|
@ -1786,6 +1798,21 @@ files = [
|
||||||
{file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"},
|
{file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "openpyxl"
|
||||||
|
version = "3.1.2"
|
||||||
|
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"},
|
||||||
|
{file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
et-xmlfile = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packaging"
|
name = "packaging"
|
||||||
version = "23.1"
|
version = "23.1"
|
||||||
|
@ -2280,6 +2307,22 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
pylint = ">=1.7"
|
pylint = ">=1.7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytesseract"
|
||||||
|
version = "0.3.10"
|
||||||
|
description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "pytesseract-0.3.10-py3-none-any.whl", hash = "sha256:8f22cc98f765bf13517ead0c70effedb46c153540d25783e04014f28b55a5fc6"},
|
||||||
|
{file = "pytesseract-0.3.10.tar.gz", hash = "sha256:f1c3a8b0f07fd01a1085d451f5b8315be6eec1d5577a6796d46dc7a62bd4120f"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
packaging = ">=21.3"
|
||||||
|
Pillow = ">=8.0.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest"
|
name = "pytest"
|
||||||
version = "7.4.2"
|
version = "7.4.2"
|
||||||
|
@ -3310,4 +3353,4 @@ files = [
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "5bcd52daf4504209b9936143d506ff50fae1f094bae17686a71ddd01efb9c49f"
|
content-hash = "fa7686c29a2d587dbafb35cec00bf4da4424047410560854cec79f904dc9de97"
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from django.core.files.uploadedfile import InMemoryUploadedFile, TemporaryUploadedFile
|
from django.core.files.uploadedfile import InMemoryUploadedFile, TemporaryUploadedFile
|
||||||
|
from drf_spectacular.utils import extend_schema_field
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
from press_release_nl.processor.models import Entry, Text
|
from press_release_nl.processor.models import Entry, Text
|
||||||
|
@ -65,14 +66,22 @@ def create(self, validated_data):
|
||||||
class ProcessedTextSerializer(serializers.ModelSerializer):
|
class ProcessedTextSerializer(serializers.ModelSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Text
|
model = Text
|
||||||
fields = ["text", "score"]
|
fields = ["id", "summery", "text", "score"]
|
||||||
|
|
||||||
|
|
||||||
class EntrySerializer(serializers.ModelSerializer):
|
class EntrySerializer(serializers.ModelSerializer):
|
||||||
texts = ProcessedTextSerializer(many=True)
|
texts = serializers.SerializerMethodField(method_name="get_texts")
|
||||||
done = serializers.IntegerField(source="texts_done_count")
|
current = serializers.IntegerField(source="texts_done_count")
|
||||||
count = serializers.IntegerField(source="texts_count")
|
total = serializers.IntegerField(source="texts_count")
|
||||||
|
|
||||||
|
@extend_schema_field(ProcessedTextSerializer(many=True))
|
||||||
|
def get_texts(self, obj: Entry):
|
||||||
|
id = self.context["request"].query_params.get("id")
|
||||||
|
q = obj.texts.all()
|
||||||
|
if id:
|
||||||
|
q = q.filter(id=id)
|
||||||
|
return ProcessedTextSerializer(many=True).to_representation(q)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Entry
|
model = Entry
|
||||||
fields = ["texts", "done", "count", "created"]
|
fields = ["texts", "current", "total", "created"]
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view
|
||||||
from rest_framework import generics, parsers, permissions, status
|
from rest_framework import generics, parsers, permissions, status
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
|
|
||||||
|
@ -25,6 +26,9 @@ def create(self, request, *args, **kwargs):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@extend_schema_view(
|
||||||
|
get=extend_schema(parameters=[OpenApiParameter(name="id", type=int)])
|
||||||
|
)
|
||||||
class RetrieveEntryApiView(generics.RetrieveAPIView):
|
class RetrieveEntryApiView(generics.RetrieveAPIView):
|
||||||
queryset = Entry.objects.all()
|
queryset = Entry.objects.all()
|
||||||
permission_classes = [permissions.AllowAny]
|
permission_classes = [permissions.AllowAny]
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
# Generated by Django 4.2.5 on 2023-09-08 18:42
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("processor", "0001_initial"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="text",
|
||||||
|
name="summery",
|
||||||
|
field=models.TextField(blank=True, max_length=2000, null=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="text",
|
||||||
|
name="file",
|
||||||
|
field=models.FileField(blank=True, null=True, upload_to="uploads/"),
|
||||||
|
),
|
||||||
|
]
|
|
@ -12,7 +12,7 @@ def __str__(self):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def texts_done_count(self):
|
def texts_done_count(self):
|
||||||
return len(self.texts.filter(score__isnull=False))
|
return len(self.texts.filter(score__isnull=False, summery__isnull=False))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def texts_count(self):
|
def texts_count(self):
|
||||||
|
@ -21,6 +21,7 @@ def texts_count(self):
|
||||||
|
|
||||||
class Text(models.Model):
|
class Text(models.Model):
|
||||||
entry = models.ForeignKey("Entry", related_name="texts", on_delete=models.CASCADE)
|
entry = models.ForeignKey("Entry", related_name="texts", on_delete=models.CASCADE)
|
||||||
|
summery = models.TextField(max_length=2000, blank=True, null=True)
|
||||||
file = models.FileField(blank=True, null=True, upload_to="uploads/")
|
file = models.FileField(blank=True, null=True, upload_to="uploads/")
|
||||||
text = models.TextField(blank=True, null=True, max_length=25_000)
|
text = models.TextField(blank=True, null=True, max_length=25_000)
|
||||||
score = models.JSONField(null=True)
|
score = models.JSONField(null=True)
|
||||||
|
|
|
@ -1,11 +1,25 @@
|
||||||
|
import openpyxl
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
|
||||||
from press_release_nl.processor.models import Text
|
from press_release_nl.processor.models import Text
|
||||||
from press_release_nl.processor.tasks import load_text
|
from press_release_nl.processor.tasks import load_text, load_text_sum
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, sender=Text)
|
@receiver(post_save, sender=Text)
|
||||||
def run_text_process(sender, instance: Text, created, **kwargs):
|
def run_text_process(sender, instance: Text, created, **kwargs):
|
||||||
if created:
|
if created:
|
||||||
load_text.apply_async(kwargs={"pk": instance.pk}, countdown=1)
|
if instance.file and instance.file.path.endswith("xlsx"):
|
||||||
|
wb_obj = openpyxl.load_workbook(instance.file.path)
|
||||||
|
sheet = wb_obj.worksheets[0]
|
||||||
|
for column in sheet.iter_cols():
|
||||||
|
column_name = column[0].value
|
||||||
|
if column_name == "pr_txt":
|
||||||
|
for text in column:
|
||||||
|
text = text.value
|
||||||
|
if text and text != "pr_txt":
|
||||||
|
Text.objects.create(entry=instance.entry, text=text)
|
||||||
|
instance.delete()
|
||||||
|
return
|
||||||
|
load_text.apply_async(kwargs={"pk": instance.pk}, countdown=2)
|
||||||
|
load_text_sum.apply_async(kwargs={"pk": instance.pk}, countdown=4)
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import textract
|
import textract
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
|
@ -5,13 +7,16 @@
|
||||||
from press_release_nl.processor.models import Text
|
from press_release_nl.processor.models import Text
|
||||||
|
|
||||||
ML_HOST = "https://2b6a-176-59-106-6.ngrok-free.app/"
|
ML_HOST = "https://2b6a-176-59-106-6.ngrok-free.app/"
|
||||||
|
ML_SUM_HOST = "https://dev.akarpov.ru/"
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def load_text(pk: int):
|
def load_text(pk: int):
|
||||||
text = Text.objects.get(pk=pk)
|
text = Text.objects.get(pk=pk)
|
||||||
if not text.text:
|
if not text.text:
|
||||||
text.text = textract.process(text.file.path, encoding="unicode_escape").decode()
|
text.text = textract.process(
|
||||||
|
text.file.path, encoding="unicode_escape", language="rus"
|
||||||
|
).decode()
|
||||||
text.save()
|
text.save()
|
||||||
re = requests.post(ML_HOST + "predict", json={"data": text.text})
|
re = requests.post(ML_HOST + "predict", json={"data": text.text})
|
||||||
if re.status_code != 200:
|
if re.status_code != 200:
|
||||||
|
@ -19,3 +24,18 @@ def load_text(pk: int):
|
||||||
text.score = re.json()
|
text.score = re.json()
|
||||||
text.save()
|
text.save()
|
||||||
return pk
|
return pk
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def load_text_sum(pk: int):
|
||||||
|
text = Text.objects.get(pk=pk)
|
||||||
|
if not text.text:
|
||||||
|
sleep(3)
|
||||||
|
text.refresh_from_db()
|
||||||
|
re = requests.post(ML_SUM_HOST, json={"body": text.text})
|
||||||
|
if re.status_code != 200:
|
||||||
|
raise ValueError(re.text)
|
||||||
|
data = re.json()
|
||||||
|
text.summery = str(data)
|
||||||
|
text.save()
|
||||||
|
return pk
|
||||||
|
|
|
@ -48,6 +48,8 @@ django-coverage-plugin = "^3.0.0"
|
||||||
pytest-django = "^4.5.2"
|
pytest-django = "^4.5.2"
|
||||||
sentry-sdk = "^1.12.0"
|
sentry-sdk = "^1.12.0"
|
||||||
textract = "^1.6.5"
|
textract = "^1.6.5"
|
||||||
|
pytesseract = "^0.3.10"
|
||||||
|
openpyxl = "^3.1.2"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user