|
|
|
@ -3,7 +3,7 @@ import subprocess
|
|
|
|
|
import uuid
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_docx_content_by_pandoc(docx_file, output_file=None):
|
|
|
|
|
def get_docx_content_by_pandoc(docx_file, output_file=None, extract_media=None):
|
|
|
|
|
# 最后拼接的内容
|
|
|
|
|
content = ""
|
|
|
|
|
# output_file 设置为临时目录下的uuid.md
|
|
|
|
@ -12,9 +12,11 @@ def get_docx_content_by_pandoc(docx_file, output_file=None):
|
|
|
|
|
prefix = docx_file.split(".")[0].split("/")[-1]
|
|
|
|
|
temp_markdown = os.path.join('./static/markdown/', prefix + '.md')
|
|
|
|
|
# 调用pandoc将docx文件转换成markdown
|
|
|
|
|
os.mkdir("./static/Images/" + file_name)
|
|
|
|
|
if extract_media is None:
|
|
|
|
|
os.mkdir("./static/Images/" + file_name)
|
|
|
|
|
extract_media = "./static/Images/"
|
|
|
|
|
subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown,
|
|
|
|
|
'--extract-media=./static/Images/' + file_name])
|
|
|
|
|
'--extract-media=' + extract_media + file_name])
|
|
|
|
|
# 读取然后修改内容,输出到新的文件
|
|
|
|
|
img_idx = 0 # 图片索引
|
|
|
|
|
with open(temp_markdown, 'r', encoding='utf-8') as f:
|
|
|
|
|