mirror of
https://github.com/Ai-hack-MAGNUM-OPUS/backend.git
synced 2024-11-24 01:03:44 +03:00
optimised docx highlighter
This commit is contained in:
parent
1e6c23477e
commit
522a733c35
|
@ -25,7 +25,6 @@ def process_paragraphs(text):
|
|||
|
||||
def process_word_paragraphs(text):
|
||||
text = text.split("\\r")
|
||||
print(text)
|
||||
return _base_process(text)
|
||||
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ def process_file(pk: int):
|
|||
|
||||
cut = 100
|
||||
counter = 0
|
||||
len_c = len(paragraphs)
|
||||
len_c = len(paragraphs) + 1
|
||||
paragraphs = list(paragraphs.values())
|
||||
for i in range(0, len(paragraphs) // cut + 1):
|
||||
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
||||
|
@ -49,15 +49,14 @@ def process_word(pk: int):
|
|||
file = WordDocx.objects.get(pk=pk)
|
||||
uuid = file.uuid
|
||||
paragraphs = process_word_paragraphs(file.text.tobytes().decode())
|
||||
print(paragraphs)
|
||||
|
||||
file.paragraphs_loaded = len(paragraphs)
|
||||
file.save(update_fields=["paragraphs_loaded"])
|
||||
|
||||
cut = 100
|
||||
counter = 0
|
||||
len_c = len(paragraphs)
|
||||
cut = 150
|
||||
len_c = len(paragraphs) + 1
|
||||
paragraphs = list(paragraphs.values())
|
||||
counter = 0
|
||||
for i in range(0, len(paragraphs) // cut + 1):
|
||||
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
||||
dct = {x: vals[x] for x in range(len(vals))}
|
||||
|
@ -83,25 +82,31 @@ def process_word(pk: int):
|
|||
@shared_task
|
||||
def highlight_file(pk: int):
|
||||
c = 0
|
||||
title = True
|
||||
lim = 0
|
||||
file = Docx.objects.get(pk=pk)
|
||||
document = Document(file.file.path)
|
||||
|
||||
for paragraph in document.paragraphs:
|
||||
if title:
|
||||
if (
|
||||
paragraph.text
|
||||
and len(paragraph.text) > 2
|
||||
and paragraph.text[:2] == "1."
|
||||
):
|
||||
title = False
|
||||
else:
|
||||
if paragraph.text:
|
||||
x = requests.post(
|
||||
"http://109.248.175.223:5000/api", json={1: paragraph.text}
|
||||
)
|
||||
paragraphs = document.paragraphs
|
||||
cut = 100
|
||||
|
||||
for paragraph in paragraphs:
|
||||
if paragraph.text and len(paragraph.text) > 2 and paragraph.text[:2] == "1.":
|
||||
break
|
||||
lim += 1
|
||||
for i in range(0, len(paragraphs) // cut + 1):
|
||||
paragraphs_sliced = paragraphs[i * cut + lim : (i + 1) * cut + lim + 1]
|
||||
dct = {x: paragraphs_sliced[x].text for x in range(len(paragraphs_sliced))}
|
||||
n_dct = {}
|
||||
for el, dat in dct.items():
|
||||
if dat:
|
||||
n_dct[el] = dat
|
||||
x = requests.post("http://109.248.175.223:5000/api", json=n_dct)
|
||||
jsn = x.json()
|
||||
if x.status_code == 200:
|
||||
el_id, dat = x.json()["1"]
|
||||
for j in range(len(paragraphs_sliced)):
|
||||
if j in n_dct:
|
||||
paragraph = paragraphs_sliced[j]
|
||||
el_id, dat = jsn[str(j)]
|
||||
if dat < 50:
|
||||
text = paragraph.text
|
||||
paragraph.clear()
|
||||
|
@ -110,6 +115,6 @@ def highlight_file(pk: int):
|
|||
run.add_text(text)
|
||||
c += 1
|
||||
else:
|
||||
print("AI ERROR")
|
||||
print("AI server error")
|
||||
document.save(file.file.path)
|
||||
return f"highlighted {c}, {pk}"
|
||||
|
|
Loading…
Reference in New Issue
Block a user