From 4499c499127343285c1d25df061864e9a59028f8 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Thu, 10 Jul 2025 13:16:57 +0800 Subject: [PATCH] 'commit' --- dsLightRag/ShiTi/T3_DocxToMd.py | 5 +- dsLightRag/Util/LightRagUtil.py | 2 +- .../__pycache__/LightRagUtil.cpython-310.pyc | Bin 5302 -> 5302 bytes .../output/物理Docx整理后的结果.md | 80 ++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 dsLightRag/output/物理Docx整理后的结果.md diff --git a/dsLightRag/ShiTi/T3_DocxToMd.py b/dsLightRag/ShiTi/T3_DocxToMd.py index 8813d8b7..a23cf7fe 100644 --- a/dsLightRag/ShiTi/T3_DocxToMd.py +++ b/dsLightRag/ShiTi/T3_DocxToMd.py @@ -56,7 +56,7 @@ async def main(): vision_model_func=vision_model_func, embedding_func=embedding_func, ) - + # 需要注意:注释掉将整理出来的文档内容插入到LightRAG的代码。 await rag.process_document_complete( file_path=file_path, output_dir=output_dir, @@ -93,6 +93,9 @@ async def main(): format_exam_content(raw_text=content, output_path=os.path.join(output_dir, "物理Docx整理后的结果.md")) logger.info(f"内容整理完成,保存至: {os.path.join(output_dir, '物理Docx整理后的结果.md')}") + # 将path目录下的images目录,整体拷贝到 output下 + shutil.copytree(path + r'/images', output_dir + r'/images') + if __name__ == "__main__": asyncio.run(main()) diff --git a/dsLightRag/Util/LightRagUtil.py b/dsLightRag/Util/LightRagUtil.py index 5a6de628..12d6100a 100644 --- a/dsLightRag/Util/LightRagUtil.py +++ b/dsLightRag/Util/LightRagUtil.py @@ -186,7 +186,7 @@ def create_vision_model_func(llm_model_func): def format_exam_content(raw_text, output_path): client = OpenAI( api_key=LLM_API_KEY, - base_url=LLM_BASE_URL, + base_url=LLM_BASE_URL ) """ 将OCR识别的原始试卷内容格式化为标准试题格式 diff --git a/dsLightRag/Util/__pycache__/LightRagUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/LightRagUtil.cpython-310.pyc index 5ee61c5fb1ae8ef2af684a38007c6f8334da0c9b..29ee8d0e891eba47626d6831d6d6756f0225a22b 100644 GIT binary patch delta 22 ccmdm{xlNNdpO=@50SFv?@-rrEV!Z delta 22 ccmdm{xlNNdpO=@50SLOh@-rrG