文件需转化返回内容

This commit is contained in:
junleea 2025-04-10 16:37:38 +08:00
parent 0a23a77f78
commit 61f40aa983
2 changed files with 22 additions and 8 deletions

View File

@ -43,7 +43,7 @@ def extract_text_from_pdf(file_path):
print(f"读取 PDF 文件时出错: {e}")
return None
def extract_text_from_epub(epub_path, txt_path):
def extract_text_from_epub(epub_path):
# 加载EPUB文件
book = epub.read_epub(epub_path)
@ -61,8 +61,9 @@ def extract_text_from_epub(epub_path, txt_path):
full_text += text + "\n"
# 将文本写入TXT文件
with open(txt_path, 'w', encoding='utf-8') as file:
file.write(full_text)
# with open(txt_path, 'w', encoding='utf-8') as file:
# file.write(full_text)
return full_text
def extract_text_from_doc(file_path):
"""从 DOC 文件中提取文本"""
@ -130,6 +131,10 @@ def dowload_file(file_url, save_path):
#filepath为fileurl,从网络下载
def get_file_content(fileName):
save_path = '/home/saw/file/' + fileName
#查看文件是否存在
if os.path.exists(save_path):
print(f"文件已存在: {save_path}")
return extract_text(save_path)
file_url = 'https://pm.ljsea.top/tool/file/' + fileName
dowload_file(file_url, save_path)
"""获取文件内容"""

View File

@ -89,9 +89,18 @@ def get_file_add_convert():
file_list = get_file()
if file_list:
for file in file_list:
#文件类型是否允许
if file["FileStoreName"].endswith(".txt") or file["FileStoreName"].endswith(".docx") or file["FileStoreName"].endswith(".pdf") or file["FileStoreName"].endswith(".epub"):
print("文件类型允许")
else:
print("文件类型不允许:", file["FileStoreName"])
continue
print("文件ID:", file["ID"])
print("文件名称:", file["FileName"], "\t", file["FileStoreName"])
text = fileBase.get_file_content(file["FileStoreName"])
if text is None:
print("文件内容获取失败")
return None
print("文件内容长度:", len(text))
#将文件内容上传
print(create_file_content(file["ID"], text))
@ -120,5 +129,5 @@ def get_Kbase_server_id():
print(f"请求失败,状态码: {response.status_code}, 错误信息: {response.text}")
return None
# if __name__ == "__main__":
# get_file_add_convert()
if __name__ == "__main__":
get_file_add_convert()