文件需转化返回内容
This commit is contained in:
parent
0a23a77f78
commit
61f40aa983
|
|
@ -43,7 +43,7 @@ def extract_text_from_pdf(file_path):
|
||||||
print(f"读取 PDF 文件时出错: {e}")
|
print(f"读取 PDF 文件时出错: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_text_from_epub(epub_path, txt_path):
|
def extract_text_from_epub(epub_path):
|
||||||
# 加载EPUB文件
|
# 加载EPUB文件
|
||||||
book = epub.read_epub(epub_path)
|
book = epub.read_epub(epub_path)
|
||||||
|
|
||||||
|
|
@ -61,8 +61,9 @@ def extract_text_from_epub(epub_path, txt_path):
|
||||||
full_text += text + "\n"
|
full_text += text + "\n"
|
||||||
|
|
||||||
# 将文本写入TXT文件
|
# 将文本写入TXT文件
|
||||||
with open(txt_path, 'w', encoding='utf-8') as file:
|
# with open(txt_path, 'w', encoding='utf-8') as file:
|
||||||
file.write(full_text)
|
# file.write(full_text)
|
||||||
|
return full_text
|
||||||
|
|
||||||
def extract_text_from_doc(file_path):
|
def extract_text_from_doc(file_path):
|
||||||
"""从 DOC 文件中提取文本"""
|
"""从 DOC 文件中提取文本"""
|
||||||
|
|
@ -130,6 +131,10 @@ def dowload_file(file_url, save_path):
|
||||||
#filepath为fileurl,从网络下载
|
#filepath为fileurl,从网络下载
|
||||||
def get_file_content(fileName):
|
def get_file_content(fileName):
|
||||||
save_path = '/home/saw/file/' + fileName
|
save_path = '/home/saw/file/' + fileName
|
||||||
|
#查看文件是否存在
|
||||||
|
if os.path.exists(save_path):
|
||||||
|
print(f"文件已存在: {save_path}")
|
||||||
|
return extract_text(save_path)
|
||||||
file_url = 'https://pm.ljsea.top/tool/file/' + fileName
|
file_url = 'https://pm.ljsea.top/tool/file/' + fileName
|
||||||
dowload_file(file_url, save_path)
|
dowload_file(file_url, save_path)
|
||||||
"""获取文件内容"""
|
"""获取文件内容"""
|
||||||
|
|
|
||||||
|
|
@ -89,9 +89,18 @@ def get_file_add_convert():
|
||||||
file_list = get_file()
|
file_list = get_file()
|
||||||
if file_list:
|
if file_list:
|
||||||
for file in file_list:
|
for file in file_list:
|
||||||
|
#文件类型是否允许
|
||||||
|
if file["FileStoreName"].endswith(".txt") or file["FileStoreName"].endswith(".docx") or file["FileStoreName"].endswith(".pdf") or file["FileStoreName"].endswith(".epub"):
|
||||||
|
print("文件类型允许")
|
||||||
|
else:
|
||||||
|
print("文件类型不允许:", file["FileStoreName"])
|
||||||
|
continue
|
||||||
print("文件ID:", file["ID"])
|
print("文件ID:", file["ID"])
|
||||||
print("文件名称:", file["FileName"], "\t", file["FileStoreName"])
|
print("文件名称:", file["FileName"], "\t", file["FileStoreName"])
|
||||||
text = fileBase.get_file_content(file["FileStoreName"])
|
text = fileBase.get_file_content(file["FileStoreName"])
|
||||||
|
if text is None:
|
||||||
|
print("文件内容获取失败")
|
||||||
|
return None
|
||||||
print("文件内容长度:", len(text))
|
print("文件内容长度:", len(text))
|
||||||
#将文件内容上传
|
#将文件内容上传
|
||||||
print(create_file_content(file["ID"], text))
|
print(create_file_content(file["ID"], text))
|
||||||
|
|
@ -120,5 +129,5 @@ def get_Kbase_server_id():
|
||||||
print(f"请求失败,状态码: {response.status_code}, 错误信息: {response.text}")
|
print(f"请求失败,状态码: {response.status_code}, 错误信息: {response.text}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# get_file_add_convert()
|
get_file_add_convert()
|
||||||
Loading…
Reference in New Issue