文件需转化返回内容
This commit is contained in:
parent
0a23a77f78
commit
61f40aa983
|
|
@ -43,7 +43,7 @@ def extract_text_from_pdf(file_path):
|
|||
print(f"读取 PDF 文件时出错: {e}")
|
||||
return None
|
||||
|
||||
def extract_text_from_epub(epub_path, txt_path):
|
||||
def extract_text_from_epub(epub_path):
|
||||
# 加载EPUB文件
|
||||
book = epub.read_epub(epub_path)
|
||||
|
||||
|
|
@ -61,8 +61,9 @@ def extract_text_from_epub(epub_path, txt_path):
|
|||
full_text += text + "\n"
|
||||
|
||||
# 将文本写入TXT文件
|
||||
with open(txt_path, 'w', encoding='utf-8') as file:
|
||||
file.write(full_text)
|
||||
# with open(txt_path, 'w', encoding='utf-8') as file:
|
||||
# file.write(full_text)
|
||||
return full_text
|
||||
|
||||
def extract_text_from_doc(file_path):
|
||||
"""从 DOC 文件中提取文本"""
|
||||
|
|
@ -130,6 +131,10 @@ def dowload_file(file_url, save_path):
|
|||
#filepath为fileurl,从网络下载
|
||||
def get_file_content(fileName):
|
||||
save_path = '/home/saw/file/' + fileName
|
||||
#查看文件是否存在
|
||||
if os.path.exists(save_path):
|
||||
print(f"文件已存在: {save_path}")
|
||||
return extract_text(save_path)
|
||||
file_url = 'https://pm.ljsea.top/tool/file/' + fileName
|
||||
dowload_file(file_url, save_path)
|
||||
"""获取文件内容"""
|
||||
|
|
|
|||
|
|
@ -89,9 +89,18 @@ def get_file_add_convert():
|
|||
file_list = get_file()
|
||||
if file_list:
|
||||
for file in file_list:
|
||||
#文件类型是否允许
|
||||
if file["FileStoreName"].endswith(".txt") or file["FileStoreName"].endswith(".docx") or file["FileStoreName"].endswith(".pdf") or file["FileStoreName"].endswith(".epub"):
|
||||
print("文件类型允许")
|
||||
else:
|
||||
print("文件类型不允许:", file["FileStoreName"])
|
||||
continue
|
||||
print("文件ID:", file["ID"])
|
||||
print("文件名称:", file["FileName"], "\t", file["FileStoreName"])
|
||||
text = fileBase.get_file_content(file["FileStoreName"])
|
||||
if text is None:
|
||||
print("文件内容获取失败")
|
||||
return None
|
||||
print("文件内容长度:", len(text))
|
||||
#将文件内容上传
|
||||
print(create_file_content(file["ID"], text))
|
||||
|
|
@ -120,5 +129,5 @@ def get_Kbase_server_id():
|
|||
print(f"请求失败,状态码: {response.status_code}, 错误信息: {response.text}")
|
||||
return None
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# get_file_add_convert()
|
||||
if __name__ == "__main__":
|
||||
get_file_add_convert()
|
||||
Loading…
Reference in New Issue