mirror of
				https://github.com/Ai-hack-MAGNUM-OPUS/backend.git
				synced 2025-10-25 21:11:08 +03:00 
			
		
		
		
	optimised docx highlighter
This commit is contained in:
		
							parent
							
								
									1e6c23477e
								
							
						
					
					
						commit
						522a733c35
					
				|  | @ -25,7 +25,6 @@ def process_paragraphs(text): | |||
| 
 | ||||
| def process_word_paragraphs(text): | ||||
|     text = text.split("\\r") | ||||
|     print(text) | ||||
|     return _base_process(text) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -20,7 +20,7 @@ def process_file(pk: int): | |||
| 
 | ||||
|     cut = 100 | ||||
|     counter = 0 | ||||
|     len_c = len(paragraphs) | ||||
|     len_c = len(paragraphs) + 1 | ||||
|     paragraphs = list(paragraphs.values()) | ||||
|     for i in range(0, len(paragraphs) // cut + 1): | ||||
|         vals = paragraphs[i * cut : (i + 1) * cut + 1] | ||||
|  | @ -49,15 +49,14 @@ def process_word(pk: int): | |||
|     file = WordDocx.objects.get(pk=pk) | ||||
|     uuid = file.uuid | ||||
|     paragraphs = process_word_paragraphs(file.text.tobytes().decode()) | ||||
|     print(paragraphs) | ||||
| 
 | ||||
|     file.paragraphs_loaded = len(paragraphs) | ||||
|     file.save(update_fields=["paragraphs_loaded"]) | ||||
| 
 | ||||
|     cut = 100 | ||||
|     counter = 0 | ||||
|     len_c = len(paragraphs) | ||||
|     cut = 150 | ||||
|     len_c = len(paragraphs) + 1 | ||||
|     paragraphs = list(paragraphs.values()) | ||||
|     counter = 0 | ||||
|     for i in range(0, len(paragraphs) // cut + 1): | ||||
|         vals = paragraphs[i * cut : (i + 1) * cut + 1] | ||||
|         dct = {x: vals[x] for x in range(len(vals))} | ||||
|  | @ -83,25 +82,31 @@ def process_word(pk: int): | |||
| @shared_task | ||||
| def highlight_file(pk: int): | ||||
|     c = 0 | ||||
|     title = True | ||||
|     lim = 0 | ||||
|     file = Docx.objects.get(pk=pk) | ||||
|     document = Document(file.file.path) | ||||
| 
 | ||||
|     for paragraph in document.paragraphs: | ||||
|         if title: | ||||
|             if ( | ||||
|                 paragraph.text | ||||
|                 and len(paragraph.text) > 2 | ||||
|                 and paragraph.text[:2] == "1." | ||||
|             ): | ||||
|                 title = False | ||||
|         else: | ||||
|             if paragraph.text: | ||||
|                 x = requests.post( | ||||
|                     "http://109.248.175.223:5000/api", json={1: paragraph.text} | ||||
|                 ) | ||||
|     paragraphs = document.paragraphs | ||||
|     cut = 100 | ||||
| 
 | ||||
|     for paragraph in paragraphs: | ||||
|         if paragraph.text and len(paragraph.text) > 2 and paragraph.text[:2] == "1.": | ||||
|             break | ||||
|         lim += 1 | ||||
|     for i in range(0, len(paragraphs) // cut + 1): | ||||
|         paragraphs_sliced = paragraphs[i * cut + lim : (i + 1) * cut + lim + 1] | ||||
|         dct = {x: paragraphs_sliced[x].text for x in range(len(paragraphs_sliced))} | ||||
|         n_dct = {} | ||||
|         for el, dat in dct.items(): | ||||
|             if dat: | ||||
|                 n_dct[el] = dat | ||||
|         x = requests.post("http://109.248.175.223:5000/api", json=n_dct) | ||||
|         jsn = x.json() | ||||
|         if x.status_code == 200: | ||||
|                     el_id, dat = x.json()["1"] | ||||
|             for j in range(len(paragraphs_sliced)): | ||||
|                 if j in n_dct: | ||||
|                     paragraph = paragraphs_sliced[j] | ||||
|                     el_id, dat = jsn[str(j)] | ||||
|                     if dat < 50: | ||||
|                         text = paragraph.text | ||||
|                         paragraph.clear() | ||||
|  | @ -110,6 +115,6 @@ def highlight_file(pk: int): | |||
|                         run.add_text(text) | ||||
|                         c += 1 | ||||
|         else: | ||||
|                     print("AI ERROR") | ||||
|             print("AI server error") | ||||
|     document.save(file.file.path) | ||||
|     return f"highlighted {c}, {pk}" | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user