|
|
|
@ -5,6 +5,7 @@ pip install pypandoc
|
|
|
|
|
|
|
|
|
|
import pypandoc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def docx_to_latex(docx_path):
|
|
|
|
|
latex_content = pypandoc.convert_file(docx_path, 'latex')
|
|
|
|
|
import re
|
|
|
|
@ -12,13 +13,20 @@ def docx_to_latex(docx_path):
|
|
|
|
|
latex_content = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content)
|
|
|
|
|
# 替换图片路径为【图片X】格式
|
|
|
|
|
img_count = 1
|
|
|
|
|
|
|
|
|
|
def replacer(match):
|
|
|
|
|
nonlocal img_count
|
|
|
|
|
result = f'【图片{img_count}】'
|
|
|
|
|
img_count += 1
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
latex_content = re.sub(r'\\includegraphics\[.*?\]\{.*?\}', replacer, latex_content)
|
|
|
|
|
return latex_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
latex_content = docx_to_latex('带公式的WORD文档.docx')
|
|
|
|
|
print(latex_content) # 包含LaTeX格式公式
|
|
|
|
|
|
|
|
|
|
# 遍历字符串的每一行
|
|
|
|
|
for line in latex_content.split('\n'):
|
|
|
|
|
if len(line.strip()) > 0:
|
|
|
|
|
print(line.strip())
|
|
|
|
|