You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

34 lines
883 B

"""
conda activate rag
pip install pypandoc
"""
import pypandoc
def docx_to_latex(docx_path):
latex_content = pypandoc.convert_file(docx_path, 'latex')
import re
# 替换公式格式
latex_content = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content)
latex_content = re.sub(r'\\\((.*?)\\\)', r'$\1$', latex_content)
# 替换图片路径为【图片X】格式
img_count = 1
def replacer(match):
nonlocal img_count
result = f'【图片{img_count}'
img_count += 1
return result
latex_content = re.sub(r'\\includegraphics\[.*?\]\{.*?\}', replacer, latex_content)
return latex_content
latex_content = docx_to_latex('带公式的WORD文档.docx')
# 遍历字符串的每一行
for line in latex_content.split('\n'):
if len(line.strip()) > 0:
print(line.strip())