diff --git a/dsRag/Test/TestGongShi.py b/dsRag/Test/TestGongShi.py new file mode 100644 index 00000000..c2e20498 --- /dev/null +++ b/dsRag/Test/TestGongShi.py @@ -0,0 +1,14 @@ +""" +conda activate rag +pip install pypandoc +""" + +import pypandoc + +def docx_to_latex(docx_path): + latex_content = pypandoc.convert_file(docx_path, 'latex') + import re + return re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', latex_content) + +latex_content = docx_to_latex('带公式的WORD文档.docx') +print(latex_content) # 包含LaTeX格式公式 \ No newline at end of file diff --git a/dsRag/Test/带公式的WORD文档.docx b/dsRag/Test/带公式的WORD文档.docx new file mode 100644 index 00000000..56a9709c Binary files /dev/null and b/dsRag/Test/带公式的WORD文档.docx differ