mirror of
https://github.com/Ai-hack-MAGNUM-OPUS/backend.git
synced 2024-11-24 09:13:44 +03:00
optimised docx highlighter
This commit is contained in:
parent
1e6c23477e
commit
522a733c35
|
@ -25,7 +25,6 @@ def process_paragraphs(text):
|
||||||
|
|
||||||
def process_word_paragraphs(text):
|
def process_word_paragraphs(text):
|
||||||
text = text.split("\\r")
|
text = text.split("\\r")
|
||||||
print(text)
|
|
||||||
return _base_process(text)
|
return _base_process(text)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ def process_file(pk: int):
|
||||||
|
|
||||||
cut = 100
|
cut = 100
|
||||||
counter = 0
|
counter = 0
|
||||||
len_c = len(paragraphs)
|
len_c = len(paragraphs) + 1
|
||||||
paragraphs = list(paragraphs.values())
|
paragraphs = list(paragraphs.values())
|
||||||
for i in range(0, len(paragraphs) // cut + 1):
|
for i in range(0, len(paragraphs) // cut + 1):
|
||||||
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
||||||
|
@ -49,15 +49,14 @@ def process_word(pk: int):
|
||||||
file = WordDocx.objects.get(pk=pk)
|
file = WordDocx.objects.get(pk=pk)
|
||||||
uuid = file.uuid
|
uuid = file.uuid
|
||||||
paragraphs = process_word_paragraphs(file.text.tobytes().decode())
|
paragraphs = process_word_paragraphs(file.text.tobytes().decode())
|
||||||
print(paragraphs)
|
|
||||||
|
|
||||||
file.paragraphs_loaded = len(paragraphs)
|
file.paragraphs_loaded = len(paragraphs)
|
||||||
file.save(update_fields=["paragraphs_loaded"])
|
file.save(update_fields=["paragraphs_loaded"])
|
||||||
|
|
||||||
cut = 100
|
cut = 150
|
||||||
counter = 0
|
len_c = len(paragraphs) + 1
|
||||||
len_c = len(paragraphs)
|
|
||||||
paragraphs = list(paragraphs.values())
|
paragraphs = list(paragraphs.values())
|
||||||
|
counter = 0
|
||||||
for i in range(0, len(paragraphs) // cut + 1):
|
for i in range(0, len(paragraphs) // cut + 1):
|
||||||
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
vals = paragraphs[i * cut : (i + 1) * cut + 1]
|
||||||
dct = {x: vals[x] for x in range(len(vals))}
|
dct = {x: vals[x] for x in range(len(vals))}
|
||||||
|
@ -83,25 +82,31 @@ def process_word(pk: int):
|
||||||
@shared_task
|
@shared_task
|
||||||
def highlight_file(pk: int):
|
def highlight_file(pk: int):
|
||||||
c = 0
|
c = 0
|
||||||
title = True
|
lim = 0
|
||||||
file = Docx.objects.get(pk=pk)
|
file = Docx.objects.get(pk=pk)
|
||||||
document = Document(file.file.path)
|
document = Document(file.file.path)
|
||||||
|
|
||||||
for paragraph in document.paragraphs:
|
paragraphs = document.paragraphs
|
||||||
if title:
|
cut = 100
|
||||||
if (
|
|
||||||
paragraph.text
|
for paragraph in paragraphs:
|
||||||
and len(paragraph.text) > 2
|
if paragraph.text and len(paragraph.text) > 2 and paragraph.text[:2] == "1.":
|
||||||
and paragraph.text[:2] == "1."
|
break
|
||||||
):
|
lim += 1
|
||||||
title = False
|
for i in range(0, len(paragraphs) // cut + 1):
|
||||||
else:
|
paragraphs_sliced = paragraphs[i * cut + lim : (i + 1) * cut + lim + 1]
|
||||||
if paragraph.text:
|
dct = {x: paragraphs_sliced[x].text for x in range(len(paragraphs_sliced))}
|
||||||
x = requests.post(
|
n_dct = {}
|
||||||
"http://109.248.175.223:5000/api", json={1: paragraph.text}
|
for el, dat in dct.items():
|
||||||
)
|
if dat:
|
||||||
|
n_dct[el] = dat
|
||||||
|
x = requests.post("http://109.248.175.223:5000/api", json=n_dct)
|
||||||
|
jsn = x.json()
|
||||||
if x.status_code == 200:
|
if x.status_code == 200:
|
||||||
el_id, dat = x.json()["1"]
|
for j in range(len(paragraphs_sliced)):
|
||||||
|
if j in n_dct:
|
||||||
|
paragraph = paragraphs_sliced[j]
|
||||||
|
el_id, dat = jsn[str(j)]
|
||||||
if dat < 50:
|
if dat < 50:
|
||||||
text = paragraph.text
|
text = paragraph.text
|
||||||
paragraph.clear()
|
paragraph.clear()
|
||||||
|
@ -110,6 +115,6 @@ def highlight_file(pk: int):
|
||||||
run.add_text(text)
|
run.add_text(text)
|
||||||
c += 1
|
c += 1
|
||||||
else:
|
else:
|
||||||
print("AI ERROR")
|
print("AI server error")
|
||||||
document.save(file.file.path)
|
document.save(file.file.path)
|
||||||
return f"highlighted {c}, {pk}"
|
return f"highlighted {c}, {pk}"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user