import asyncio import os from Util.DocxUtil import get_docx_content_by_pandoc async def main(): # 要处理的文件路径 file_path = "ShiTi/Docx/《动能定理》巩固练习.docx" # 转换docx为md get_docx_content_by_pandoc(file_path,'结果.md') # """ # 修正一下MinerU生成的Latex中,如果是数字加圆圈的样式 \textcircled{1}, # 无法在Typora或者PyCharm中显示的问题,改成兼容性更强的 \enclose{circle}{1} # """ # path = r'../output/' + fileName + '/auto' # finalName = path + r'/' + fileName + '.md' # formatted_content = '' # with open(finalName, 'r', encoding='utf-8') as f: # content = f.read() # content = content.replace(r'\textcircled', r'\enclose{circle}') # # 按【题型】分割试题 # question_types = ["不定项选择", "单选题", "多选题", "填空题", "判断题", "完型填空题", "计算题"] # # # 按 【题型】 分隔开 # content = content.replace("\n\n", "\n") # # 从头开始找,找到第一个【题型】 # content = content[content.find('【题型】'):] # # questions = content.split('【题型】') # idx = 0 # for q in questions: # # 干掉空行 # if q.strip() == "" or q == '\n': # continue # # 如果q是以 question_types 中某个字符开头的,则在完成这个字符串后,换行输出 # for x in question_types: # if q.startswith(x): # q = q.replace(" ", "") # # q的x后面第一个字符是不是换行符\n,如果 不是,则添加一个\n # if q[q.index(x) + len(x)] != '\n': # q = q.replace(x, x + '\n') # break # # q = '【题型】' + q # formatted_content = formatted_content + q + '\n' # # with open(path + r'/测试.md', 'w', encoding='utf-8') as f: # f.write(formatted_content) # # 将path目录下的images目录,整体拷贝到 output下 # if os.path.exists(output_dir + r'/images'): # shutil.rmtree(output_dir + r'/images') # shutil.copytree(path + r'/images', output_dir + r'/images') # # 删除path目录下 # # shutil.rmtree(path) if __name__ == "__main__": asyncio.run(main())