diff --git a/dsLightRag/Util/DocxUtil.py b/dsLightRag/Util/DocxUtil.py index 02439bae..afcb4909 100644 --- a/dsLightRag/Util/DocxUtil.py +++ b/dsLightRag/Util/DocxUtil.py @@ -8,7 +8,9 @@ def get_docx_content_by_pandoc(docx_file): content = "" # output_file 设置为临时目录下的uuid.md file_name = uuid.uuid4().hex - temp_markdown = os.path.join('./static/markdown/', file_name + '.md') + # 将docx_file去掉扩展名 + prefix = docx_file.split(".")[0] + temp_markdown = os.path.join('./static/markdown/', prefix + '.md') # 调用pandoc将docx文件转换成markdown os.mkdir("./static/Images/" + file_name) subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown, @@ -42,5 +44,5 @@ def get_docx_content_by_pandoc(docx_file): content += line.strip().replace("**", "") + "\n" # 删除临时文件 output_file - os.remove(temp_markdown) - return content.replace("\n\n", "\n").replace("\\","") + #os.remove(temp_markdown) + return content.replace("\n\n", "\n").replace("\\", "")