|
|
import asyncio
|
|
|
import os
|
|
|
|
|
|
from Util.DocxUtil import get_docx_content_by_pandoc
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
# 要处理的文件路径
|
|
|
file_path = "ShiTi/Docx/《动能定理》巩固练习.docx"
|
|
|
|
|
|
# 转换docx为md
|
|
|
get_docx_content_by_pandoc(file_path,'结果.md')
|
|
|
|
|
|
|
|
|
# """
|
|
|
# 修正一下MinerU生成的Latex中,如果是数字加圆圈的样式 \textcircled{1},
|
|
|
# 无法在Typora或者PyCharm中显示的问题,改成兼容性更强的 \enclose{circle}{1}
|
|
|
# """
|
|
|
# path = r'../output/' + fileName + '/auto'
|
|
|
# finalName = path + r'/' + fileName + '.md'
|
|
|
# formatted_content = ''
|
|
|
# with open(finalName, 'r', encoding='utf-8') as f:
|
|
|
# content = f.read()
|
|
|
# content = content.replace(r'\textcircled', r'\enclose{circle}')
|
|
|
# # 按【题型】分割试题
|
|
|
# question_types = ["不定项选择", "单选题", "多选题", "填空题", "判断题", "完型填空题", "计算题"]
|
|
|
#
|
|
|
# # 按 【题型】 分隔开
|
|
|
# content = content.replace("\n\n", "\n")
|
|
|
# # 从头开始找,找到第一个【题型】
|
|
|
# content = content[content.find('【题型】'):]
|
|
|
#
|
|
|
# questions = content.split('【题型】')
|
|
|
# idx = 0
|
|
|
# for q in questions:
|
|
|
# # 干掉空行
|
|
|
# if q.strip() == "" or q == '\n':
|
|
|
# continue
|
|
|
# # 如果q是以 question_types 中某个字符开头的,则在完成这个字符串后,换行输出
|
|
|
# for x in question_types:
|
|
|
# if q.startswith(x):
|
|
|
# q = q.replace(" ", "")
|
|
|
# # q的x后面第一个字符是不是换行符\n,如果 不是,则添加一个\n
|
|
|
# if q[q.index(x) + len(x)] != '\n':
|
|
|
# q = q.replace(x, x + '\n')
|
|
|
# break
|
|
|
#
|
|
|
# q = '【题型】' + q
|
|
|
# formatted_content = formatted_content + q + '\n'
|
|
|
#
|
|
|
# with open(path + r'/测试.md', 'w', encoding='utf-8') as f:
|
|
|
# f.write(formatted_content)
|
|
|
# # 将path目录下的images目录,整体拷贝到 output下
|
|
|
# if os.path.exists(output_dir + r'/images'):
|
|
|
# shutil.rmtree(output_dir + r'/images')
|
|
|
# shutil.copytree(path + r'/images', output_dir + r'/images')
|
|
|
# # 删除path目录下
|
|
|
# # shutil.rmtree(path)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
asyncio.run(main())
|