main
HuangHai 2 weeks ago
parent 044466992b
commit dc7189af7a

@ -7,7 +7,7 @@ async def main():
# 要处理的文件路径
file_path = "Docx/《动能定理》巩固练习.docx"
# 转换docx为md
get_docx_content_by_pandoc(file_path, '../Word转试题.md')
get_docx_content_by_pandoc(file_path, '../Word转试题.md',extract_media='../static/Images/')
if __name__ == "__main__":
asyncio.run(main())

@ -3,7 +3,7 @@ import subprocess
import uuid
def get_docx_content_by_pandoc(docx_file, output_file=None):
def get_docx_content_by_pandoc(docx_file, output_file=None, extract_media=None):
# 最后拼接的内容
content = ""
# output_file 设置为临时目录下的uuid.md
@ -12,9 +12,11 @@ def get_docx_content_by_pandoc(docx_file, output_file=None):
prefix = docx_file.split(".")[0].split("/")[-1]
temp_markdown = os.path.join('./static/markdown/', prefix + '.md')
# 调用pandoc将docx文件转换成markdown
os.mkdir("./static/Images/" + file_name)
if extract_media is None:
os.mkdir("./static/Images/" + file_name)
extract_media = "./static/Images/"
subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown,
'--extract-media=./static/Images/' + file_name])
'--extract-media=' + extract_media + file_name])
# 读取然后修改内容,输出到新的文件
img_idx = 0 # 图片索引
with open(temp_markdown, 'r', encoding='utf-8') as f:

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Loading…
Cancel
Save