'commit'

6 days ago · b6d2c1be05
parent 1e366f1970
commit b6d2c1be05
3 changed files with 16 additions and 10 deletions
--- a/dsLightRag/Topic/ShiJi/kv_store_doc_status.json
+++ b/dsLightRag/Topic/ShiJi/kv_store_doc_status.json
--- a/dsLightRag/Util/DocxUtil.py
+++ b/dsLightRag/Util/DocxUtil.py
@ -20,6 +20,7 @@ handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -
 logger.addHandler(handler)
 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

+
 def resize_images_in_directory(directory_path, max_width=640, max_height=480):
    """
    遍历目录下所有图片并缩放到指定尺寸
@ -55,6 +56,7 @@ def resize_images_in_directory(directory_path, max_width=640, max_height=480):

 import hashlib

+
 def calculate_docx_md5(docx_file_path):
    """
    计算docx文件的MD5哈希值
@ -73,6 +75,8 @@ def calculate_docx_md5(docx_file_path):

    # 返回16进制格式的哈希值
    return md5_hash.hexdigest()
+
+
 def get_docx_content_by_pandoc(docx_file):
    # 最后拼接的内容
    content = ""
@ -83,6 +87,8 @@ def get_docx_content_by_pandoc(docx_file):
    prefix = docx_file.split(".")[0].split("/")[-1]
    temp_markdown = os.path.join('./static/markdown/', prefix + '.md')
    # 调用pandoc将docx文件转换成markdown
+    path = "./static/Images/" + md5_value
+    if not os.path.exists(path):
        os.mkdir("./static/Images/" + md5_value)
    subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown,
                    '--extract-media=./static/Images/' + md5_value])
--- a/dsLightRag/Util/pycache/DocxUtil.cpython-310.pyc
+++ b/dsLightRag/Util/pycache/DocxUtil.cpython-310.pyc