文件需转化返回内容

2025-04-10 16:37:38 +08:00 · 2025-04-10 16:37:38 +08:00 · 61f40aa983
parent 0a23a77f78
commit 61f40aa983
2 changed files with 22 additions and 8 deletions
--- a/kbase_py/fileBase.py
+++ b/kbase_py/fileBase.py
@ -43,7 +43,7 @@ def extract_text_from_pdf(file_path):
        print(f"读取 PDF 文件时出错: {e}")
        return None
-def extract_text_from_epub(epub_path, txt_path):
+def extract_text_from_epub(epub_path):
    # 加载EPUB文件
    book = epub.read_epub(epub_path)
@ -61,8 +61,9 @@ def extract_text_from_epub(epub_path, txt_path):
            full_text += text + "\n"
    # 将文本写入TXT文件
-    with open(txt_path, 'w', encoding='utf-8') as file:
+    # with open(txt_path, 'w', encoding='utf-8') as file:
-        file.write(full_text)
+    #     file.write(full_text)
    return full_text
 def extract_text_from_doc(file_path):
    """从 DOC 文件中提取文本"""
@ -130,6 +131,10 @@ def dowload_file(file_url, save_path):
 #filepath为fileurl,从网络下载
 def get_file_content(fileName):
    save_path = '/home/saw/file/' + fileName
    #查看文件是否存在
    if os.path.exists(save_path):
        print(f"文件已存在: {save_path}")
        return extract_text(save_path)
    file_url = 'https://pm.ljsea.top/tool/file/' + fileName
    dowload_file(file_url, save_path)
    """获取文件内容"""
--- a/kbase_py/fileCon.py
+++ b/kbase_py/fileCon.py
@ -89,9 +89,18 @@ def get_file_add_convert():
    file_list = get_file()
    if file_list:
        for file in file_list:
            #文件类型是否允许
            if file["FileStoreName"].endswith(".txt") or file["FileStoreName"].endswith(".docx") or file["FileStoreName"].endswith(".pdf") or file["FileStoreName"].endswith(".epub"):
                print("文件类型允许")
            else:
                print("文件类型不允许:", file["FileStoreName"])
                continue
            print("文件ID:", file["ID"])
            print("文件名称:",  file["FileName"],  "\t", file["FileStoreName"])
            text = fileBase.get_file_content(file["FileStoreName"])
            if text is None:
                print("文件内容获取失败")
                return None
            print("文件内容长度:", len(text))
            #将文件内容上传
            print(create_file_content(file["ID"], text))
@ -120,5 +129,5 @@ def get_Kbase_server_id():
        print(f"请求失败，状态码: {response.status_code}, 错误信息: {response.text}")
        return None
-# if __name__ == "__main__":
+if __name__ == "__main__":
-#     get_file_add_convert()
+    get_file_add_convert()