From c3b33d3dc8d726bb5c9edfbf97b11fece696b229 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Thu, 10 Jul 2025 15:29:13 +0800 Subject: [PATCH] 'commit' --- dsLightRag/ST3_DocxToMd.py | 50 -------------------------------------- 1 file changed, 50 deletions(-) diff --git a/dsLightRag/ST3_DocxToMd.py b/dsLightRag/ST3_DocxToMd.py index 732b69e8..853412c8 100644 --- a/dsLightRag/ST3_DocxToMd.py +++ b/dsLightRag/ST3_DocxToMd.py @@ -2,61 +2,11 @@ import asyncio import os from Util.DocxUtil import get_docx_content_by_pandoc - - async def main(): # 要处理的文件路径 file_path = "ShiTi/Docx/《动能定理》巩固练习.docx" - # 转换docx为md get_docx_content_by_pandoc(file_path,'结果.md') - - # """ - # 修正一下MinerU生成的Latex中,如果是数字加圆圈的样式 \textcircled{1}, - # 无法在Typora或者PyCharm中显示的问题,改成兼容性更强的 \enclose{circle}{1} - # """ - # path = r'../output/' + fileName + '/auto' - # finalName = path + r'/' + fileName + '.md' - # formatted_content = '' - # with open(finalName, 'r', encoding='utf-8') as f: - # content = f.read() - # content = content.replace(r'\textcircled', r'\enclose{circle}') - # # 按【题型】分割试题 - # question_types = ["不定项选择", "单选题", "多选题", "填空题", "判断题", "完型填空题", "计算题"] - # - # # 按 【题型】 分隔开 - # content = content.replace("\n\n", "\n") - # # 从头开始找,找到第一个【题型】 - # content = content[content.find('【题型】'):] - # - # questions = content.split('【题型】') - # idx = 0 - # for q in questions: - # # 干掉空行 - # if q.strip() == "" or q == '\n': - # continue - # # 如果q是以 question_types 中某个字符开头的,则在完成这个字符串后,换行输出 - # for x in question_types: - # if q.startswith(x): - # q = q.replace(" ", "") - # # q的x后面第一个字符是不是换行符\n,如果 不是,则添加一个\n - # if q[q.index(x) + len(x)] != '\n': - # q = q.replace(x, x + '\n') - # break - # - # q = '【题型】' + q - # formatted_content = formatted_content + q + '\n' - # - # with open(path + r'/测试.md', 'w', encoding='utf-8') as f: - # f.write(formatted_content) - # # 将path目录下的images目录,整体拷贝到 output下 - # if os.path.exists(output_dir + r'/images'): - # shutil.rmtree(output_dir + r'/images') - # shutil.copytree(path + r'/images', output_dir + r'/images') - # # 删除path目录下 - # # shutil.rmtree(path) - - if __name__ == "__main__": asyncio.run(main())