mirror of
https://github.com/Alexander-D-Karpov/akarpov
synced 2024-11-28 22:53:42 +03:00
19 lines
473 B
Python
19 lines
473 B
Python
import chardet
|
|
import textract
|
|
from textract.exceptions import ExtensionNotSupported
|
|
|
|
|
|
def extract_file_text(file: str) -> str:
|
|
try:
|
|
text = textract.process(file)
|
|
except ExtensionNotSupported:
|
|
try:
|
|
rawdata = open(file, "rb").read()
|
|
enc = chardet.detect(rawdata)
|
|
with open(file, encoding=enc["encoding"]) as file:
|
|
text = file.read()
|
|
except Exception:
|
|
return ""
|
|
|
|
return text
|