diff --git a/dsLightRag/Test/TestPandoc.py b/dsLightRag/T1_GetDocxContent.py similarity index 65% rename from dsLightRag/Test/TestPandoc.py rename to dsLightRag/T1_GetDocxContent.py index 24619bc6..9f73f4bf 100644 --- a/dsLightRag/Test/TestPandoc.py +++ b/dsLightRag/T1_GetDocxContent.py @@ -2,7 +2,7 @@ from Util.DocxUtil import * if __name__ == '__main__': # docx文件路径 - docx_file = '../static/Txt/化学方程式_CHEMISTRY_1.docx' + docx_file = 'static/Txt/化学方程式_CHEMISTRY_1.docx' # 整合最终的拼接完的文本 content = get_docx_content_by_pandoc(docx_file) diff --git a/dsLightRag/Util/DocxUtil.py b/dsLightRag/Util/DocxUtil.py index 3c8dd862..face4064 100644 --- a/dsLightRag/Util/DocxUtil.py +++ b/dsLightRag/Util/DocxUtil.py @@ -30,11 +30,11 @@ def get_docx_content_by_pandoc(docx_file): content = "" # output_file 设置为临时目录下的uuid.md file_name = uuid.uuid4().hex - temp_markdown = os.path.join('../static/markdown/', file_name + '.md') + temp_markdown = os.path.join('./static/markdown/', file_name + '.md') # 调用pandoc将docx文件转换成markdown - os.mkdir("../static/Images/" + file_name) + os.mkdir("./static/Images/" + file_name) subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown, - '--extract-media=../static/Images/' + file_name]) + '--extract-media=./static/Images/' + file_name]) # 读取然后修改内容,输出到新的文件 wmf_idx = 0 # wmf索引 img_idx = 0 # 图片索引 diff --git a/dsLightRag/Util/__pycache__/DocxUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/DocxUtil.cpython-310.pyc index 77dd3f86..11e693d0 100644 Binary files a/dsLightRag/Util/__pycache__/DocxUtil.cpython-310.pyc and b/dsLightRag/Util/__pycache__/DocxUtil.cpython-310.pyc differ diff --git a/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image1.wmf b/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image1.wmf new file mode 100644 index 00000000..9c4be07d Binary files /dev/null and b/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image1.wmf differ diff --git a/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image2.wmf b/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image2.wmf new file mode 100644 index 00000000..a17f8e26 Binary files /dev/null and b/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image2.wmf differ diff --git a/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image3.png b/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image3.png new file mode 100644 index 00000000..1c3ba28e Binary files /dev/null and b/dsLightRag/static/Images/d042881647c64dd7b04d1ec4a55d842f/media/image3.png differ