diff --git a/dsRag/Test/TestGongShi.py b/dsRag/Test/TestGongShi.py index 9e9cae25..387f6069 100644 --- a/dsRag/Test/TestGongShi.py +++ b/dsRag/Test/TestGongShi.py @@ -5,6 +5,7 @@ pip install pypandoc import pypandoc + def docx_to_latex(docx_path): latex_content = pypandoc.convert_file(docx_path, 'latex') import re @@ -12,13 +13,20 @@ def docx_to_latex(docx_path): latex_content = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content) # 替换图片路径为【图片X】格式 img_count = 1 + def replacer(match): nonlocal img_count result = f'【图片{img_count}】' img_count += 1 return result + latex_content = re.sub(r'\\includegraphics\[.*?\]\{.*?\}', replacer, latex_content) return latex_content + latex_content = docx_to_latex('带公式的WORD文档.docx') -print(latex_content) # 包含LaTeX格式公式 \ No newline at end of file + +# 遍历字符串的每一行 +for line in latex_content.split('\n'): + if len(line.strip()) > 0: + print(line.strip())